diff --git a/linkerd.io/.forestry/front_matter/templates/blog-detail-page.yml b/linkerd.io/.forestry/front_matter/templates/blog-detail-page.yml deleted file mode 100644 index 9870eb5ed7..0000000000 --- a/linkerd.io/.forestry/front_matter/templates/blog-detail-page.yml +++ /dev/null @@ -1,45 +0,0 @@ ---- -label: Blogpost -hide_body: false -fields: -- type: text - name: title - label: Title - config: - required: true -- type: tag_list - name: tags - label: Tags -- type: select - name: author - config: - source: - type: documents - file: linkerd.io/data/authors.json - section: authors - label: Author -- type: datetime - label: Date - name: date - config: - time_format: " " - date_format: MM/DD/YYYY - export_format: YYYY-MM-DD -- type: file - name: thumbnail - label: Thumbnail - default: '' -- type: file - label: Featured Thumbnail - name: feature -- type: boolean - label: Featured Blog - name: featured -pages: -- linkerd.io/content/blog/december-linkerd-community-meeting.md -- linkerd.io/content/blog/january-2021-linkerd-hero-nomination.md -- linkerd.io/content/blog/october-linkerd-community-meeting.md -- linkerd.io/content/blog/topology-aware-service-routing-on-kubernetes-with-linkerd.md -- linkerd.io/content/blog/under-the-hood-of-linkerd-s-state-of-the-art-rust-proxy-linkerd2-proxy.md -- linkerd.io/content/blog/vote-for-your-december-linkerd-hero.md -- linkerd.io/content/dashboard/september-linkerd-online-community-meeting.md diff --git a/linkerd.io/.forestry/front_matter/templates/dashboard.yml b/linkerd.io/.forestry/front_matter/templates/dashboard.yml deleted file mode 100644 index af78fc26cf..0000000000 --- a/linkerd.io/.forestry/front_matter/templates/dashboard.yml +++ /dev/null @@ -1,23 +0,0 @@ ---- -label: Dashboard -hide_body: false -fields: -- name: date - type: datetime - label: Date - description: '' - config: - required: true - date_format: MM/DD/YYYY - time_format: - display_utc: false - export_format: YYYY-MM-DD -- name: title - type: text - config: - required: true - label: Title -pages: -- linkerd.io/content/dashboard/january-linkerd-online-community-meeting.md -- linkerd.io/content/dashboard/october-linkerd-online-community-meeting.md -- linkerd.io/content/dashboard/the-linkerd-2021-user-survey-is-here.md diff --git a/linkerd.io/.forestry/front_matter/templates/hero.yml b/linkerd.io/.forestry/front_matter/templates/hero.yml deleted file mode 100644 index 79e1ba3a81..0000000000 --- a/linkerd.io/.forestry/front_matter/templates/hero.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -label: Hero -hide_body: true -fields: -- type: text - name: title - label: Title - config: - required: false -- type: textarea - name: description - label: Description - config: - required: false - wysiwyg: true - schema: - format: markdown -- type: file - name: image - label: Image -- type: boolean - label: Image on the right - name: image_on_the_right -- type: include - name: buttons - label: Buttons - template: buttons diff --git a/linkerd.io/.forestry/front_matter/templates/related-content.yml b/linkerd.io/.forestry/front_matter/templates/related-content.yml deleted file mode 100644 index c627ffedd9..0000000000 --- a/linkerd.io/.forestry/front_matter/templates/related-content.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- -label: Related Content -hide_body: false -fields: -- type: list - name: items - label: Pinned pages - config: - min: '1' - use_select: true - source: - type: pages - section: '' - max: '4' - description: Pinned Content Pages diff --git a/linkerd.io/.forestry/front_matter/templates/schema-faq.yml b/linkerd.io/.forestry/front_matter/templates/schema-faq.yml deleted file mode 100644 index 710af0d3d8..0000000000 --- a/linkerd.io/.forestry/front_matter/templates/schema-faq.yml +++ /dev/null @@ -1,47 +0,0 @@ ---- -label: 'Schema: FAQ' -hide_body: true -fields: -- name: enableFAQSchema - type: boolean - label: Enable FAQ Schema -- name: schema_faq - type: field_group - config: {} - fields: - - name: faqs - type: field_group_list - fields: - - name: question - type: text - config: - required: true - label: Question - - name: answer - type: textarea - default: '' - config: - required: true - wysiwyg: false - schema: - format: markdown - label: Answer - - name: answer_schema - type: textarea - default: '' - config: - required: true - wysiwyg: false - schema: - format: markdown - label: Answer for Schema - description: Should be same as answer, without the links. - config: - min: - max: - labelField: - label: FAQs - label: 'Schema: FAQ' - showOnly: - field: enableFAQSchema - value: true diff --git a/linkerd.io/.forestry/front_matter/templates/schema-video.yml b/linkerd.io/.forestry/front_matter/templates/schema-video.yml deleted file mode 100644 index cc5a7a8121..0000000000 --- a/linkerd.io/.forestry/front_matter/templates/schema-video.yml +++ /dev/null @@ -1,62 +0,0 @@ ---- -label: 'Schema: Video' -hide_body: true -fields: -- name: enableVideoSchema - type: boolean - label: video object -- name: schema_video - type: field_group_list - config: - min: 1 - max: 3 - labelField: - fields: - - name: title - type: text - config: - required: true - label: Title - - name: description - type: textarea - default: '' - config: - required: true - wysiwyg: false - schema: - format: markdown - label: Description - - name: thumbnailUrl - type: text - config: - required: false - label: Thumbnail Url - default: https://i.ytimg.com/ - - name: embedUrl - type: text - config: - required: true - label: YouTube Embed Url - default: '' - description: https://www.youtube.com/watch?v=123 - - name: uploadDate - type: datetime - label: Upload Date - description: '' - config: - required: true - date_format: YYYY-MM-DD - time_format: - display_utc: false - export_format: YYYY-MM-DD - default: now - - name: duration - type: text - config: - required: false - label: Duration - default: PT3078S - label: 'Schema: Video' - showOnly: - field: enableVideoSchema - value: true diff --git a/linkerd.io/.forestry/settings.yml b/linkerd.io/.forestry/settings.yml deleted file mode 100644 index 5c6723a942..0000000000 --- a/linkerd.io/.forestry/settings.yml +++ /dev/null @@ -1,61 +0,0 @@ ---- -new_page_extension: md -auto_deploy: false -admin_path: '' -webhook_url: -sections: -- type: document - path: linkerd.io/data/authors.json - label: Authors -- type: directory - path: linkerd.io/content - label: Pages - create: all - match: "/[a-z]*/**/*" -- type: directory - path: linkerd.io/content/dashboard - label: Dashboard - create: all - match: "**/*" -- type: directory - path: linkerd.io/content/blog - label: Blog - create: all - match: "**/*" -- type: directory - path: linkerd.io/content/community - label: Community - create: all - match: "**/*" - templates: - - community -- type: directory - path: linkerd.io/content/heroes - label: Heroes - create: all - match: "**/*" -- type: directory - path: linkerd.io/content/2 - label: Linkerd 2 Docs - create: all - match: "**/*" -- type: directory - path: linkerd.io/content/1 - label: Linkerd 1 Docs - create: all - match: "**/*" -upload_dir: linkerd.io/static/uploads -public_path: "/uploads" -front_matter_path: '' -use_front_matter_path: false -file_template: ":filename:" -build: - preview_env: - - HUGO_ENV=staging - - HUGO_VERSION=0.55.6 - preview_output_directory: public - preview_docker_image: forestryio/hugo:latest - mount_path: "/srv" - instant_preview_command: hugo server -D -E -F --port 8080 --bind 0.0.0.0 --renderToDisk - -d public -version: 0.54.0 diff --git a/linkerd.io/.forestry/snippets/asciinema.snippet b/linkerd.io/.forestry/snippets/asciinema.snippet deleted file mode 100644 index d5de150f59..0000000000 --- a/linkerd.io/.forestry/snippets/asciinema.snippet +++ /dev/null @@ -1 +0,0 @@ -{{< asciinema VIDEO_ID >}} diff --git a/linkerd.io/.forestry/snippets/twitter.snippet b/linkerd.io/.forestry/snippets/twitter.snippet deleted file mode 100644 index 396f1004da..0000000000 --- a/linkerd.io/.forestry/snippets/twitter.snippet +++ /dev/null @@ -1 +0,0 @@ -{{< twitter TWEET_ID >}} diff --git a/linkerd.io/.forestry/snippets/youtube.snippet b/linkerd.io/.forestry/snippets/youtube.snippet deleted file mode 100644 index 292a89a64d..0000000000 --- a/linkerd.io/.forestry/snippets/youtube.snippet +++ /dev/null @@ -1 +0,0 @@ -{{< youtube id="VIDEO_ID" t="10" >}} diff --git a/linkerd.io/content/2-edge/tasks/distributed-tracing.md b/linkerd.io/content/2-edge/tasks/distributed-tracing.md index 3d673a2c3d..ad532d71d0 100644 --- a/linkerd.io/content/2-edge/tasks/distributed-tracing.md +++ b/linkerd.io/content/2-edge/tasks/distributed-tracing.md @@ -138,7 +138,8 @@ extension specifying the service exposing the Jaeger UI. By default, this would be something like this: ```bash -linkerd viz install --set jaegerUrl=jaeger.linkerd-jaeger:16686 +linkerd viz install --set jaegerUrl=jaeger.linkerd-jaeger:16686 \ + | kubectl apply -f - ``` ## Cleanup @@ -186,7 +187,8 @@ collector: exporters: jaeger: endpoint: my-jaeger-collector.my-jaeger-ns:14250 - insecure: true + tls: + insecure: true service: extensions: [health_check] pipelines: diff --git a/linkerd.io/content/2-edge/tasks/linkerd-smi.md b/linkerd.io/content/2-edge/tasks/linkerd-smi.md index faf469c8f9..875a5aab63 100644 --- a/linkerd.io/content/2-edge/tasks/linkerd-smi.md +++ b/linkerd.io/content/2-edge/tasks/linkerd-smi.md @@ -82,7 +82,7 @@ linkerd inject https://raw.githubusercontent.com/linkerd/linkerd2/main/test/inte ``` This installs a simple client, and two server deployments. -One of the server deployments i.e `faling-svc` always returns a 500 error, +One of the server deployments i.e `failing-svc` always returns a 500 error, and the other one i.e `backend-svc` always returns a 200. ```bash diff --git a/linkerd.io/content/2.10/tasks/linkerd-smi.md b/linkerd.io/content/2.10/tasks/linkerd-smi.md index a414907c66..69addae1bb 100644 --- a/linkerd.io/content/2.10/tasks/linkerd-smi.md +++ b/linkerd.io/content/2.10/tasks/linkerd-smi.md @@ -82,7 +82,7 @@ linkerd inject https://raw.githubusercontent.com/linkerd/linkerd2/main/test/inte ``` This installs a simple client, and two server deployments. -One of the server deployments i.e `faling-svc` always returns a 500 error, +One of the server deployments i.e `failing-svc` always returns a 500 error, and the other one i.e `backend-svc` always returns a 200. ```bash diff --git a/linkerd.io/content/2.11/tasks/linkerd-smi.md b/linkerd.io/content/2.11/tasks/linkerd-smi.md index 700c5a3f8f..cf4041cc7f 100644 --- a/linkerd.io/content/2.11/tasks/linkerd-smi.md +++ b/linkerd.io/content/2.11/tasks/linkerd-smi.md @@ -82,7 +82,7 @@ linkerd inject https://raw.githubusercontent.com/linkerd/linkerd2/main/test/inte ``` This installs a simple client, and two server deployments. -One of the server deployments i.e `faling-svc` always returns a 500 error, +One of the server deployments i.e `failing-svc` always returns a 500 error, and the other one i.e `backend-svc` always returns a 200. ```bash diff --git a/linkerd.io/content/2.12/tasks/linkerd-smi.md b/linkerd.io/content/2.12/tasks/linkerd-smi.md index faf469c8f9..875a5aab63 100644 --- a/linkerd.io/content/2.12/tasks/linkerd-smi.md +++ b/linkerd.io/content/2.12/tasks/linkerd-smi.md @@ -82,7 +82,7 @@ linkerd inject https://raw.githubusercontent.com/linkerd/linkerd2/main/test/inte ``` This installs a simple client, and two server deployments. -One of the server deployments i.e `faling-svc` always returns a 500 error, +One of the server deployments i.e `failing-svc` always returns a 500 error, and the other one i.e `backend-svc` always returns a 200. ```bash diff --git a/linkerd.io/content/2.13/features/cni.md b/linkerd.io/content/2.13/features/cni.md index a70dd80fad..999e5443fb 100644 --- a/linkerd.io/content/2.13/features/cni.md +++ b/linkerd.io/content/2.13/features/cni.md @@ -9,10 +9,10 @@ every meshed pod to its proxy. (See the without the application being aware. By default, this rewiring is done with an [Init -Container](../../reference/architecture/#linkerd-init-container) that uses iptables -to install routing rules for the pod, at pod startup time. However, this requires -the `CAP_NET_ADMIN` capability; and in some clusters, this capability is not -granted to pods. +Container](../../reference/architecture/#linkerd-init-container) that uses +iptables to install routing rules for the pod, at pod startup time. However, +this requires the `CAP_NET_ADMIN` capability; and in some clusters, this +capability is not granted to pods. To handle this, Linkerd can optionally run these iptables rules in a [CNI plugin](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/network-plugins/) diff --git a/linkerd.io/content/2.13/features/http-grpc.md b/linkerd.io/content/2.13/features/http-grpc.md index 74ddd707ce..bb32e81916 100644 --- a/linkerd.io/content/2.13/features/http-grpc.md +++ b/linkerd.io/content/2.13/features/http-grpc.md @@ -4,18 +4,18 @@ description = "Linkerd will automatically enable advanced features (including me weight = 1 +++ -Linkerd can proxy all TCP connections, and will automatically enable advanced -features (including metrics, load balancing, retries, and more) for HTTP, -HTTP/2, and gRPC connections. (See -[TCP Proxying and Protocol Detection](../protocol-detection/) for details of how -this detection happens). +Linkerd can proxy all TCP connections. For HTTP connections (including HTTP/1.0, +HTTP/1.1, HTTP/2, and gRPC connections), it will automatically enable advanced +L7 features including [request-level metrics](../telemetry/), [latency-aware +load balancing](../load-balancing/), [retries](../retries-and-timeouts/), and +more. -## Notes +(See [TCP Proxying and Protocol Detection](../protocol-detection/) for details of +how this detection happens automatically, and how it can sometimes fail.) -* gRPC applications that use [grpc-go][grpc-go] must use version 1.3 or later due - to a [bug](https://github.com/grpc/grpc-go/issues/1120) in earlier versions. -* gRPC applications that use [@grpc/grpc-js][grpc-js] must use version 1.1.0 or later - due to a [bug](https://github.com/grpc/grpc-node/issues/1475) in earlier versions. - -[grpc-go]: https://github.com/grpc/grpc-go -[grpc-js]: https://github.com/grpc/grpc-node/tree/master/packages/grpc-js +Note that while Linkerd does [zero-config mutual TLS](../automatic-mtls/), it +cannot decrypt TLS connections initiated by the outside world. For example, if +you have a TLS connection from outside the cluster, or if your application does +HTTP/2 plus TLS, Linkerd will treat these connections as raw TCP streams. To +take advantage of Linkerd's full array of L7 features, communication between +meshed pods must be TLS'd by Linkerd, not by the application itself. diff --git a/linkerd.io/content/2.13/features/nft.md b/linkerd.io/content/2.13/features/nft.md index 502b7053fd..beb0a29504 100644 --- a/linkerd.io/content/2.13/features/nft.md +++ b/linkerd.io/content/2.13/features/nft.md @@ -1,6 +1,6 @@ +++ -title = "Proxy Init Iptables Modes" -description = "Linkerd's init container can run in two separate modes, nft or legacy." +title = "Iptables-nft Support" +description = "Linkerd's init container can use iptables-nft on systems that require it." +++ To transparently route TCP traffic through the proxy, without any awareness @@ -8,7 +8,7 @@ from the application, Linkerd will configure a set of [firewall rules](../../reference/iptables/) in each injected pod. Configuration can be done either through an [init container](../../reference/architecture/#linkerd-init-container) or through a -[CNI plugin](../cni/) +[CNI plugin](../cni/). Linkerd's init container can be run in two separate modes: `legacy` or `nft`. The difference between the two modes is what variant of `iptables` they will use @@ -26,7 +26,7 @@ two, is which binary they will call into: This is the default mode that `linkerd-init` runs in, and is supported by most operating systems and distributions. 2. `nft` mode will call into `iptables-nft`, which uses the newer `nf_tables` - kernel API. The [`nftables`] utilities are used by newer operating systems to + kernel API. The `nftables` utilities are used by newer operating systems to configure firewalls by default. [`iptables-legacy`]: https://manpages.debian.org/bullseye/iptables/iptables-legacy.8.en.html diff --git a/linkerd.io/content/2.13/features/request-routing.md b/linkerd.io/content/2.13/features/request-routing.md new file mode 100644 index 0000000000..6daef28b0f --- /dev/null +++ b/linkerd.io/content/2.13/features/request-routing.md @@ -0,0 +1,24 @@ ++++ +title = "Dynamic Request Routing" +description = "Linkerd can route individual HTTP requests based on their properties." ++++ + +Linkerd's dynamic request routing allows you to control routing of HTTP and gRPC +traffic based on properties of the request, including verb, method, query +parameters, and headers. For example, you can route all requests that match +a specific URL pattern to a given backend; or you can route traffic with a +particular header to a different service. + +This is an example of _client-side policy_, i.e. ways to dynamically configure +Linkerd's behavior when it is sending requests from a meshed pod. + +Dynamic request routing is built on Kubernetes's Gateway API types, especially +[HTTPRoute](https://gateway-api.sigs.k8s.io/api-types/httproute/). + +This feature extends Linkerd's traffic routing capabilities beyond those of +[traffic splits](../traffic-split/), which only provide percentage-based +splits. + +## Learning more + +- [Guide to configuring routing policy](../../tasks/configuring-dynamic-request-routing/) diff --git a/linkerd.io/content/2.13/features/server-policy.md b/linkerd.io/content/2.13/features/server-policy.md index 2bad7ae96b..eb688edc33 100644 --- a/linkerd.io/content/2.13/features/server-policy.md +++ b/linkerd.io/content/2.13/features/server-policy.md @@ -130,5 +130,5 @@ result in an abrupt termination of those connections. ## Learning more -- [Policy reference](../../reference/authorization-policy/) +- [Authorization policy reference](../../reference/authorization-policy/) - [Guide to configuring per-route policy](../../tasks/configuring-per-route-policy/) diff --git a/linkerd.io/content/2.13/features/traffic-split.md b/linkerd.io/content/2.13/features/traffic-split.md index c6b9582eb9..725bbce8e3 100644 --- a/linkerd.io/content/2.13/features/traffic-split.md +++ b/linkerd.io/content/2.13/features/traffic-split.md @@ -13,8 +13,14 @@ for example, by slowly easing traffic off of an older version of a service and onto a newer version. {{< note >}} -If working with headless services, traffic splits cannot be retrieved. Linkerd -reads service discovery information based off the target IP address, and if that +This feature will eventually be supplanted by the newer [dynamic request +routing](../request-routing/) capabilities, which does not require the SMI +extension. +{{< /note >}} + +{{< note >}} +TrafficSplits cannot be used with headless services. Linkerd reads +service discovery information based off the target IP address, and if that happens to be a pod IP address then it cannot tell which service the pod belongs to. {{< /note >}} diff --git a/linkerd.io/content/2.13/tasks/configuring-dynamic-request-routing.md b/linkerd.io/content/2.13/tasks/configuring-dynamic-request-routing.md index 319f5aa749..ec7d32ed47 100644 --- a/linkerd.io/content/2.13/tasks/configuring-dynamic-request-routing.md +++ b/linkerd.io/content/2.13/tasks/configuring-dynamic-request-routing.md @@ -24,7 +24,7 @@ request routing, by deploying in the cluster two backend and one frontend podinfo pods. Traffic will flow to just one backend, and then we'll switch traffic to the other one just by adding a header to the frontend requests. -## Set Up +## Setup First we create the `test` namespace, annotated by linkerd so all pods that get created there get injected with the linkerd proxy: diff --git a/linkerd.io/content/2.13/tasks/configuring-per-route-policy.md b/linkerd.io/content/2.13/tasks/configuring-per-route-policy.md index 881126f092..e724da6418 100644 --- a/linkerd.io/content/2.13/tasks/configuring-per-route-policy.md +++ b/linkerd.io/content/2.13/tasks/configuring-per-route-policy.md @@ -1,12 +1,12 @@ +++ -title = "Configuring Per-Route Policy" +title = "Configuring Fine-grained Authorization Policy" description = "Fine-grained authorization policies can be configured for individual HTTP routes." aliases = [] +++ -In addition to [enforcing authorization policies at the service +In addition to [enforcing authorization at the service level](../restricting-access/), finer-grained authorization policies can also be configured for individual HTTP routes. In this example, we'll use the Books demo app to demonstrate how to control which clients can access particular routes on @@ -16,7 +16,7 @@ This is an advanced example that demonstrates more complex policy configuration. For a basic introduction to Linkerd authorization policy, start with the [Restricting Access to Services](../restricting-access/) example. For more comprehensive documentation of the policy resources, see the -[Policy reference docs](../../reference/authorization-policy/). +[Authorization policy reference](../../reference/authorization-policy/). ## Prerequisites diff --git a/linkerd.io/content/2.13/tasks/distributed-tracing.md b/linkerd.io/content/2.13/tasks/distributed-tracing.md index 3d673a2c3d..ad532d71d0 100644 --- a/linkerd.io/content/2.13/tasks/distributed-tracing.md +++ b/linkerd.io/content/2.13/tasks/distributed-tracing.md @@ -138,7 +138,8 @@ extension specifying the service exposing the Jaeger UI. By default, this would be something like this: ```bash -linkerd viz install --set jaegerUrl=jaeger.linkerd-jaeger:16686 +linkerd viz install --set jaegerUrl=jaeger.linkerd-jaeger:16686 \ + | kubectl apply -f - ``` ## Cleanup @@ -186,7 +187,8 @@ collector: exporters: jaeger: endpoint: my-jaeger-collector.my-jaeger-ns:14250 - insecure: true + tls: + insecure: true service: extensions: [health_check] pipelines: diff --git a/linkerd.io/content/2.13/tasks/linkerd-smi.md b/linkerd.io/content/2.13/tasks/linkerd-smi.md index faf469c8f9..875a5aab63 100644 --- a/linkerd.io/content/2.13/tasks/linkerd-smi.md +++ b/linkerd.io/content/2.13/tasks/linkerd-smi.md @@ -82,7 +82,7 @@ linkerd inject https://raw.githubusercontent.com/linkerd/linkerd2/main/test/inte ``` This installs a simple client, and two server deployments. -One of the server deployments i.e `faling-svc` always returns a 500 error, +One of the server deployments i.e `failing-svc` always returns a 500 error, and the other one i.e `backend-svc` always returns a 200. ```bash diff --git a/linkerd.io/content/2.13/tasks/using-ingress.md b/linkerd.io/content/2.13/tasks/using-ingress.md index 73a5c65fce..667371e65e 100644 --- a/linkerd.io/content/2.13/tasks/using-ingress.md +++ b/linkerd.io/content/2.13/tasks/using-ingress.md @@ -1,38 +1,71 @@ +++ -title = "Ingress traffic" -description = "Linkerd works alongside your ingress controller of choice." +title = "Handling ingress traffic" +description = "Linkerd can work alongside your ingress controller of choice." +++ -For reasons of simplicity and composability, Linkerd doesn't provide a built-in -ingress. Instead, Linkerd is designed to work with existing Kubernetes ingress -solutions. +Ingress traffic refers to traffic that comes into your cluster from outside the +cluster. For reasons of simplicity and composability, Linkerd itself doesn't +provide a built-in ingress solution for handling traffic coming into the +cluster. Instead, Linkerd is designed to work with the many existing Kubernetes +ingress options. -Combining Linkerd and your ingress solution requires two things: +Combining Linkerd and your ingress solution of choice requires two things: -1. Configuring your ingress to support Linkerd. -2. Meshing your ingress pods so that they have the Linkerd proxy installed. +1. Configuring your ingress to support Linkerd (if necessary). +2. Meshing your ingress pods. -Meshing your ingress pods will allow Linkerd to provide features like L7 -metrics and mTLS the moment the traffic is inside the cluster. (See -[Adding your service](../adding-your-service/) for instructions on how to mesh -your ingress.) +Strictly speaking, meshing your ingress pods is not required to allow traffic +into the cluster. However, it is recommended, as it allows Linkerd to provide +features like L7 metrics and mutual TLS the moment the traffic enters the +cluster. -Note that, as explained below, some ingress options need to be meshed in -"ingress" mode, which means injecting with the `linkerd.io/inject: ingress` -annotation rather than the default `enabled`. It's possible to use this -annotation at the namespace level, but it's recommended to do it at the -individual workload level instead. The reason is that many ingress -implementations also place other types of workloads under the same namespace for -tasks other than routing and therefore you'd rather inject them using the -default `enabled` mode (or some you wouldn't want to inject at all, such as -Jobs). +## Handling external TLS + +One common job for ingress controllers is to terminate TLS from the outside +world, e.g. HTTPS calls. + +Like all pods, traffic to a meshed ingress has both an inbound and an outbound +component. If your ingress terminates TLS, Linkerd will treat this inbound TLS +traffic as an opaque TCP stream, and will only be able to provide byte-level +metrics for this side of the connection. + +Once the ingress controller terminates the TLS connection and issues the +corresponding HTTP or gRPC traffic to internal services, these outbound calls +will have the full set of metrics and mTLS support. + +## Ingress mode {#ingress-mode} + +Most ingress controllers can be meshed like any other service, i.e. by +applying the `linkerd.io/inject: enabled` annotation at the appropriate level. +(See [Adding your services to Linkerd](../adding-your-service/) for more.) + +However, some ingress options need to be meshed in a special "ingress" mode, +using the `linkerd.io/inject: ingress` annotation. + +The instructions below will describe, for each ingress, whether it requires this +mode of operation. + +If you're using "ingress" mode, we recommend that you set this ingress +annotation at the workload level rather than at the namespace level, so that +other resources in the ingress namespace are be meshed normally. {{< warning id=open-relay-warning >}} -When an ingress is meshed in `ingress` mode by using `linkerd.io/inject: -ingress`, the ingress _must_ be configured to remove the `l5d-dst-override` -header to avoid creating an open relay to cluster-local and external endpoints. +When an ingress is meshed in ingress mode, you _must_ configure it to remove +the `l5d-dst-override` header to avoid creating an open relay to cluster-local +and external endpoints. {{< /warning >}} +{{< note >}} +Linkerd versions 2.13.0 through 2.13.4 had a bug whereby the `l5d-dst-override` +header was *required* in ingress mode, or the request would fail. This bug was +fixed in 2.13.5, and was not present prior to 2.13.0. +{{< /note >}} + +For more on ingress mode and why it's necessary, see [Ingress +details](#ingress-details) below. + +## Common ingress options for Linkerd + Common ingress options that Linkerd has been used with include: - [Ambassador (aka Emissary)](#ambassador) @@ -46,23 +79,15 @@ Common ingress options that Linkerd has been used with include: - [Kong](#kong) - [Haproxy](#haproxy) - [EnRoute](#enroute) -- [Ingress details](#ingress-details) For a quick start guide to using a particular ingress, please visit the section -for that ingress. If your ingress is not on that list, never fear—it likely -works anyways. See [Ingress details](#ingress-details) below. - -{{< note >}} -If your ingress terminates TLS, this TLS traffic (e.g. HTTPS calls from outside -the cluster) will pass through Linkerd as an opaque TCP stream and Linkerd will -only be able to provide byte-level metrics for this side of the connection. The -resulting HTTP or gRPC traffic to internal services, of course, will have the -full set of metrics and mTLS support. -{{< /note >}} +for that ingress below. If your ingress is not on that list, never fear—it +likely works anyways. See [Ingress details](#ingress-details) below. -## Ambassador (aka Emissary) {#ambassador} +## Emissary-Ingress (aka Ambassador) {#ambassador} -Ambassador can be meshed normally. An example manifest for configuring the +Emissary-Ingress can be meshed normally: it does not require the [ingress +mode](#ingress-mode) annotation. An example manifest for configuring Ambassador / Emissary is as follows: ```yaml @@ -77,15 +102,18 @@ spec: service: http://web-svc.emojivoto.svc.cluster.local:80 ``` -For a more detailed guide, we recommend reading [Installing the Emissary -ingress with the Linkerd service +For a more detailed guide, we recommend reading [Installing the Emissary ingress +with the Linkerd service mesh](https://buoyant.io/2021/05/24/emissary-and-linkerd-the-best-of-both-worlds/). ## Nginx -Nginx can be meshed normally, but the +Nginx can be meshed normally: it does not require the [ingress +mode](#ingress-mode) annotation. + +The [`nginx.ingress.kubernetes.io/service-upstream`](https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/annotations/#service-upstream) -annotation should be set to `"true"`. +annotation should be set to `"true"`. For example: ```yaml # apiVersion: networking.k8s.io/v1beta1 # for k8s < v1.19 @@ -105,13 +133,11 @@ spec: number: 80 ``` -If using [this Helm chart](https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx), -note the following. - -The `namespace` containing the ingress controller (when using the above -Helm chart) should NOT be annotated with `linkerd.io/inject: enabled`. -Rather, annotate the `kind: Deployment` (`.spec.template.metadata.annotations`) -of the Nginx by setting `values.yaml` like this: +If using [the ingress-nginx Helm +chart](https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx), note +that the namespace containing the ingress controller should NOT be annotated +with `linkerd.io/inject: enabled`. Instead, you should annotate the `kind: +Deployment` (`.spec.template.metadata.annotations`). For example: ```yaml controller: @@ -120,37 +146,25 @@ controller: ... ``` -The reason is as follows. - -That Helm chart defines (among other things) two Kubernetes resources: +The reason is because this Helm chart defines (among other things) two +Kubernetes resources: 1) `kind: ValidatingWebhookConfiguration`. This creates a short-lived pod named - something like `ingress-nginx-admission-create-t7b77` which terminates in 1 - or 2 seconds. + something like `ingress-nginx-admission-create-XXXXX` which quickly terminates. 2) `kind: Deployment`. This creates a long-running pod named something like -`ingress-nginx-controller-644cc665c9-5zmrp` which contains the Nginx docker +`ingress-nginx-controller-XXXX` which contains the Nginx docker container. -However, had we set `linkerd.io/inject: enabled` at the `namespace` level, -a long-running sidecar would be injected into the otherwise short-lived -pod in (1). This long-running sidecar would prevent the pod as a whole from -terminating naturally (by design a few seconds after creation) even if the -original base admission container had terminated. - -Without (1) being considered "done", the creation of (2) would wait forever -in an infinite timeout loop. - -The above analysis only applies to that particular Helm chart. Other charts -may have a different behaviour and different file structure for `values.yaml`. -Be sure to check the nginx chart that you are using to set the annotation -appropriately, if necessary. +Setting the injection annotation at the namespace level would mesh the +short-lived pod, which would prevent it from terminating as designed. ## Traefik -Traefik should be meshed with ingress mode enabled([*](#open-relay-warning)), -i.e. with the `linkerd.io/inject: ingress` annotation rather than the default -`enabled`. Instructions differ for 1.x and 2.x versions of Traefik. +Traefik should be meshed with [ingress mode enabled](#ingress-mode), i.e. with +the `linkerd.io/inject: ingress` annotation rather than the default `enabled`. + +Instructions differ for 1.x and 2.x versions of Traefik. ### Traefik 1.x {#traefik-1x} @@ -263,8 +277,8 @@ spec: ## GCE -The GCE ingress should be meshed with ingress mode -enabled([*](#open-relay-warning)), i.e. with the `linkerd.io/inject: ingress` +The GCE ingress should be meshed with with [ingress mode +enabled](#ingress-mode), , i.e. with the `linkerd.io/inject: ingress` annotation rather than the default `enabled`. This example shows how to use a [Google Cloud Static External IP @@ -308,9 +322,8 @@ certificate is provisioned, the ingress should be visible to the Internet. ## Gloo -Gloo should be meshed with ingress mode enabled([*](#open-relay-warning)), i.e. -with the `linkerd.io/inject: ingress` annotation rather than the default -`enabled`. +Gloo should be meshed with [ingress mode enabled](#ingress-mode), i.e. with the +`linkerd.io/inject: ingress` annotation rather than the default `enabled`. As of Gloo v0.13.20, Gloo has native integration with Linkerd, so that the required Linkerd headers are added automatically. Assuming you installed Gloo @@ -332,9 +345,8 @@ glooctl add route --path-prefix=/ --dest-name booksapp-webapp-7000 ## Contour -Contour should be meshed with ingress mode enabled([*](#open-relay-warning)), -i.e. with the `linkerd.io/inject: ingress` annotation rather than the default -`enabled`. +Contour should be meshed with [ingress mode enabled](#ingress-mode), i.e. with +the `linkerd.io/inject: ingress` annotation rather than the default `enabled`. The following example uses the [Contour getting started](https://projectcontour.io/getting-started/) documentation @@ -424,9 +436,8 @@ the `l5d-dst-override` headers will be set automatically. ### Kong -Kong should be meshed with ingress mode enabled([*](#open-relay-warning)), i.e. -with the `linkerd.io/inject: ingress` annotation rather than the default -`enabled`. +Kong should be meshed with [ingress mode enabled](#ingress-mode), i.e. with the +`linkerd.io/inject: ingress` annotation rather than the default `enabled`. This example will use the following elements: @@ -513,9 +524,8 @@ haproxytech](https://www.haproxy.com/documentation/kubernetes/latest/) and not the [haproxy-ingress controller](https://haproxy-ingress.github.io/). {{< /note >}} -Haproxy should be meshed with ingress mode enabled([*](#open-relay-warning)), -i.e. with the `linkerd.io/inject: ingress` annotation rather than the default -`enabled`. +Haproxy should be meshed with [ingress mode enabled](#ingress-mode), i.e. with +the `linkerd.io/inject: ingress` annotation rather than the default `enabled`. The simplest way to use Haproxy as an ingress for Linkerd is to configure a Kubernetes `Ingress` resource with the @@ -553,8 +563,7 @@ in an ingress manifest as each one needs their own ## EnRoute OneStep {#enroute} -Meshing EnRoute with linkerd involves only setting one -flag globally: +Meshing EnRoute with Linkerd involves only setting one flag globally: ```yaml apiVersion: enroute.saaras.io/v1 @@ -574,14 +583,14 @@ spec: ``` EnRoute can now be meshed by injecting Linkerd proxy in EnRoute pods. -Using the ```linkerd``` utility, we can update the EnRoute deployment +Using the `linkerd` utility, we can update the EnRoute deployment to inject Linkerd proxy. ```bash kubectl get -n enroute-demo deploy -o yaml | linkerd inject - | kubectl apply -f - ``` -The ```linkerd_enabled``` flag automatically sets `l5d-dst-override` header. +The `linkerd_enabled` flag automatically sets `l5d-dst-override` header. The flag also delegates endpoint selection for routing to linkerd. More details and customization can be found in, @@ -593,22 +602,22 @@ Linkerd](https://getenroute.io/blog/end-to-end-encryption-mtls-linkerd-enroute/) In this section we cover how Linkerd interacts with ingress controllers in general. -In general, Linkerd can be used with any ingress controller. In order for -Linkerd to properly apply features such as route-based metrics and traffic -splitting, Linkerd needs the IP/port of the Kubernetes Service. However, by -default, many ingresses do their own endpoint selection and pass the IP/port of -the destination Pod, rather than the Service as a whole. +In order for Linkerd to properly apply L7 features such as route-based metrics +and dynamic traffic routing, Linkerd needs the ingress controller to connect +to the IP/port of the destination Kubernetes Service. However, by default, +many ingresses do their own endpoint selection and connect directly to the +IP/port of the destination Pod, rather than the Service. Thus, combining an ingress with Linkerd takes one of two forms: -1. Configure the ingress to pass the IP and port of the Service as the +1. Configure the ingress to connect to the IP and port of the Service as the destination, i.e. to skip its own endpoint selection. (E.g. see [Nginx](#nginx) above.) -2. If this is not possible, then configure the ingress to pass the Service - IP/port in a header such as `l5d-dst-override`, `Host`, or `:authority`, and - configure Linkerd in *ingress* mode. In this mode, it will read from one of - those headers instead. +2. Alternatively, configure the ingress to pass the Service IP/port in a + header such as `l5d-dst-override`, `Host`, or `:authority`, and configure + Linkerd in *ingress* mode. In this mode, it will read from one of those + headers instead. The most common approach in form #2 is to use the explicit `l5d-dst-override` header. diff --git a/linkerd.io/content/_index.md b/linkerd.io/content/_index.md index 4b95e4f8dc..006f8aa659 100644 --- a/linkerd.io/content/_index.md +++ b/linkerd.io/content/_index.md @@ -68,8 +68,8 @@ companies: link: https://www.webex.com/ - image: "/uploads/logos/blue/clover-health.png" link: https://www.cloverhealth.com/ -- image: "/uploads/logos/blue/godaddy.png" - link: https://godaddy.com/ +- image: "/uploads/logos/blue/docker.png" + link: https://www.docker.com/ - image: "/uploads/logos/blue/heb.png" link: https://www.heb.com/ - image: "/uploads/logos/blue/walmart.svg" @@ -98,7 +98,7 @@ foundation_member_banner: cta_buoyant: image: "/uploads/buoyant-logo-blue.png" textcreated: "Linkerd was created by" - caption: "Learn more about Buoyant" + caption: "Learn more about Buoyant's Linkerd offerings" url: "https://buoyant.io/" --- diff --git a/linkerd.io/content/adopters/_index.md b/linkerd.io/content/adopters/_index.md index d5e2feccd3..783c957112 100644 --- a/linkerd.io/content/adopters/_index.md +++ b/linkerd.io/content/adopters/_index.md @@ -8,6 +8,9 @@ case_studies: - image: /images/adopters/adidasb.png alt: Adidas link: https://buoyant.io/case-studies/adidas +- image: /images/adopters/db-schenker.png + alt: DB Schenker + link: https://buoyant.io/case-studies/schenker/ - image: /images/adopters/penn-state.png alt: Penn State link: https://buoyant.io/case-studies/penn-state/ diff --git a/linkerd.io/content/blog/2023/0130-mtls-and-linkerd.md b/linkerd.io/content/blog/2023/0130-mtls-and-linkerd.md index 868a958183..bb564bf82c 100644 --- a/linkerd.io/content/blog/2023/0130-mtls-and-linkerd.md +++ b/linkerd.io/content/blog/2023/0130-mtls-and-linkerd.md @@ -1,5 +1,5 @@ --- -title: "mTLS and Linkerd" +title: "Workshop recap: A deep dive into Kubernetes mTLS with Linkerd" author: 'flynn' date: 2023-01-30T00:00:00+00:00 slug: mtls-and-linkerd @@ -20,8 +20,6 @@ _This blog post is based on a workshop I recently delivered at Buoyant’s interesting, check out the [full recording](https://buoyant.io/service-mesh-academy/kubernetes-mtls-with-linkerd)!_ -## mTLS and Linkerd - You don’t have to spend much time in the cloud-native world before [mTLS](https://buoyant.io/mtls-guide) comes up. It shows up over and over again, especially once you start talking about diff --git a/linkerd.io/content/blog/2023/0221-linkerd-and-ingress.md b/linkerd.io/content/blog/2023/0221-linkerd-and-ingress.md index d88e69cba2..1e43cb8aae 100644 --- a/linkerd.io/content/blog/2023/0221-linkerd-and-ingress.md +++ b/linkerd.io/content/blog/2023/0221-linkerd-and-ingress.md @@ -2,7 +2,7 @@ author: 'flynn' date: 2023-02-21T00:00:00Z title: |- - Linkerd and Ingress Controllers: Bringing the Outside World In + Workshop recap: Linkerd and Ingress Controllers: Bringing the Outside World In url: /2023/02/21/linkerd-and-ingress/ thumbnail: '/uploads/2023/02/door-battaglia-9drS5E_Rguc-square.jpg' @@ -81,12 +81,12 @@ still inject it into the mesh, it will still get automatic mTLS and metrics from Linkerd, and all the usual Linkerd features will still work. The one way that it _is_ likely to be different from other workloads is that -you'll probably want to tell Linkerd to skip the incoming ports for the +you'll probably want to tell Linkerd to skip the inbound ports for the ingress controller. If you don't do this, the ingress controller won't be able to see the IP address of incoming connections: every connection will appear to originate with the Linkerd proxy. -To skip incoming ports, use the `config.linkerd.io/skip-incoming-ports` +To skip inbound ports, use the `config.linkerd.io/skip-inbound-ports` annotation. Note that you need to use the port on which the ingress controller is listening, not the port that the client will see! So, for example, if your ingress controller is behind a Service like @@ -105,8 +105,8 @@ spec: targetPort: 8080 ``` -then you would need to use `config.linkerd.io/skip-incoming-ports: 8080`  – -trying to skip incoming port 80 wouldn't do anything. +then you would need to use `config.linkerd.io/skip-inbound-ports: 8080`  – +trying to skip inbound port 80 wouldn't do anything. ### Linkerd Is (Mostly) Invisible @@ -151,7 +151,7 @@ basically Just Works. [Install Emissary](https://www.getambassador.io/docs/emissary/latest/tutorials/getting-started), inject it into the mesh, and... you're done. Since Emissary defaults to routing to Services, there's nothing special to do there. About the only thing -to consider is that you'll need to be sure to skip Emissary's incoming ports +to consider is that you'll need to be sure to skip Emissary's inbound ports if you want Emissary to pay attention to client IP addresses. ### NGINX @@ -173,7 +173,7 @@ nginx.ingress.kubernetes.io/service-upstream: "true" ``` After that's done, installing and meshing `ingress-nginx` should be -straightforward. Again, you'll probably want to skip incoming ports, too. +straightforward. Again, you'll probably want to skip inbound ports, too. ### Envoy Gateway @@ -204,7 +204,7 @@ changes, its data-plane Deployment is restarted. These ephemeral Deployments can be challenging to inject into the Linkerd mesh: the most effective way to do it is to put the `linker.io/inject` annotation on the `envoy-gateway-system` Namespace, since that's where the ephemeral Deployments -are created. (You can put the `config.linkerd.io/skip-incoming-ports` +are created. (You can put the `config.linkerd.io/skip-inbound-ports` annotation there too.) Once you know about that, Envoy Gateway works just fine with Linkerd. diff --git a/linkerd.io/content/blog/2023/0515-real-world-gitops.md b/linkerd.io/content/blog/2023/0515-real-world-gitops.md index 6b1773f55b..44f9ea799a 100644 --- a/linkerd.io/content/blog/2023/0515-real-world-gitops.md +++ b/linkerd.io/content/blog/2023/0515-real-world-gitops.md @@ -2,7 +2,7 @@ author: 'flynn' date: 2023-05-15T00:00:00Z title: |- - Real-World GitOps with Flux, Flagger, and Linkerd + Workshop recap: Real-World GitOps with Flux, Flagger, and Linkerd url: /2023/05/15/real-world-gitops/ thumbnail: '/uploads/2023/05/gitops-square.png' diff --git a/linkerd.io/content/blog/2023/0613-dynamic-request-routing-circuit-breaking.md b/linkerd.io/content/blog/2023/0613-dynamic-request-routing-circuit-breaking.md new file mode 100644 index 0000000000..ae27d66901 --- /dev/null +++ b/linkerd.io/content/blog/2023/0613-dynamic-request-routing-circuit-breaking.md @@ -0,0 +1,290 @@ +--- +author: 'flynn' +date: 2023-06-13T00:00:00Z +title: |- + Workshop recap: Dynamic Request Routing and Circuit Breaking +url: + /2023/06/13/dynamic-request-routing-circuit-breaking/ +thumbnail: '/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-square.jpg' +featuredImage: '/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-rect.jpg' +tags: [Linkerd, linkerd, gitops, flux, flagger] +featured: false +--- + +{{< fig + alt="Dynamic Request Routing" + title="image credit: [Denys Nevozhai](https://unsplash.com/@dnevozhai?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText)" + src="/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-rect.jpg" >}} + +_This blog post is based on a workshop that I recently delivered at Buoyant’s +[Service Mesh Academy](https://buoyant.io/service-mesh-academy). If this seems +interesting, check out the [full +recording](https://buoyant.io/service-mesh-academy/circuit-breaking-and-dynamic-routing-deep-dive/)!_ + +Linkerd 2.13 adds two long-requested features to Linkerd: _dynamic request +routing_ and _circuit breaking_. + +- Dynamic request routing permits HTTP routing based on headers, HTTP method, + etc. + +- Circuit breaking is a resilience feature that allows Linkerd to stop sending + requests to endpoints that fail too much. + +While Linkerd 2.12 has been able to do some dynamic request routing, Linkerd +2.13 expands quite a bit on the feature. Circuit breaking is completely new in +Linkerd 2.13. + +## Dynamic Request Routing + +In Linkerd 2.11 and earlier, the only mechanism for any sort of dynamic +routing used the TrafficSplit extension and the `linkerd-smi` extension to +support a coarse-grained routing behavior based on the service name and the +desired percentage of traffic to be split. For example: + +- Progressive delivery: 1% of the requests to the `foo` workload are sent to a + new version (`foo-new`), while the remaining 99% continue to be routed to + the original version. If all goes well, the percentages are shifted over + time until all the requests are going to `foo-new`. + +- Multi-cluster/failover: All of the requests to the `foo` workload get routed + to a different cluster via a mirrored `foo-west` Service. + +Linkerd 2.12 introduced support for basic header-based routing, using the +HTTPRoute CRD from the Gateway API. This allowed for routing based on the +value of a header, but it didn't support weighted routing at the same time. + +Dynamic request routing in Linkerd 2.13 brings these two worlds together using +the HTTPRoute CRD, and expands it further by supporting weighted routing based +on request headers, verbs, or other attributes of the request (though not the +request body). This is much more powerful than what was possible with 2.12 and +earlier. For example: + +- Progressive delivery is possible without using the `linkerd-smi` extension + at all. + +- Progressive delivery can be combined with header-based routing, for example + per-user canaries: use a header to select a particular group of users, then + canary only that group of users using a new version of a workload. This + enables early rollout of a new feature only for a specific group of users, + while most users continue to use the stable version. + +- A/B testing anywhere in the call graph: Since dynamic request routing + permits separating traffic based on headers or verbs, it's possible to split + users into multiple groups and route each group to a distinct version of a + workload. This allows for experimentation and comparison of different + implementations or features. + +### Sidebar: Dynamic Request Routing and the Gateway API + +The [Gateway API] is a Kubernetes SIG-Networking project started in 2020, +primarily to address the challenges related to the proliferation of +annotations in use on the Ingress resource. In 2022, the Gateway API project +began the GAMMA (Gateway API for Mesh Management and Administration) +initiative to explore how to use the Gateway API for mesh networking. Linkerd +is an active participant in both efforts: the power and flexibility of the +Gateway API makes it easier to expand Linkerd's capabilities while maintaining +its overall best-in-class operational simplicity. + +One important caveat, though, is that since the Gateway API was originally +designed to manage ingress traffic - traffic from outside the cluster coming +in - its conformance tests are not yet well-suited to service meshes, so +Linkerd can't yet be fully conformant with the Gateway API. For this reason, +Linkerd uses the HTTPRoute resource in the `policy.linkerd.io` APIGroup, +rather than the official Gateway API APIgroup. There's work actively underway +to improve this situation. + +[Gateway API]: https://gateway-api.sigs.k8s.io/ + +### Dynamic Request Routing Examples + +First, a simple canary example. This example does a 50/50 split for requests +to the `color` Service, routing half to the endpoints being the actual `color` +Service and half to those behind the `color2` Service. + +```yaml +apiVersion: policy.linkerd.io/v1beta2 +kind: HTTPRoute +metadata: + name: color-canary + namespace: faces +spec: + parentRefs: + - name: color + kind: Service + group: core + port: 80 # Match port numbers with what’s in the Service resource + rules: + - backendRefs: + - name: color + port: 80 + weight: 50 # Adjust the weights to control balancing + - backendRefs: + - name: color2 + port: 80 + weight: 50 +``` + +I'm being careful here about the distinction between a Service and the +endpoints behind the Service, because the HTTPRoute acts on _requests sent to +a particular service_, routing them to _endpoints behind a service_. This is +why having `color` in the `parentRefs` stanza and also in one of the +`backendRefs` stanzas works, without creating a loop. + +Here's an example of A/B testing. Here, requests sent to the `smiley` Service +with the header + +```text +X-Faces-User: testuser +``` + +get routed to endpoints behind the `smiley2` Service, while other requests +continue on to endpoints behind the `smiley` Service. + +```yaml +apiVersion: policy.linkerd.io/v1beta2 +kind: HTTPRoute +metadata: + name: smiley-a-b + namespace: faces +spec: + parentRefs: + - name: smiley + kind: Service + group: core + port: 80 + rules: + - matches: + - headers: + - name: "x-faces-user" # X-Faces-User: testuser goes to smiley2 + value: "testuser" + backendRefs: + - name: smiley2 + port: 80 + - backendRefs: + - name: smiley + port: 80 +``` + +One critical point about the A/B test: Linkerd can do dynamic request routing +anywhere, but of course if you want to route on a header, you need to make +sure that header is present at the place you want to use it for routing! This +may mean that you need to be careful to propagate headers through the various +workloads of your application. + +You can find more details about dynamic request routing in its documentation, +at . + +## Circuit Breaking + +Circuit breaking is new to Linkerd 2.13, but it's been long requested by +users. It's a mechanism to try to avoid overwhelming a failing workload +endpoint with additional traffic: + +- A workload endpoint starts to fail. +- Linkerd detects failures from the endpoint and temporarily stops routing + requests to that endpoint (_opening_ the breaker). +- After a little while, a test request is sent. +- If the test succeeds, thecircuit breaker is _closed_ again, allowing + requests to resume being delivered. + +In Linkerd 2.13, circuit breaking is a little limited: + +- Circuit breakers can only be opened when a certain number of consecutive + failures occur. +- "Failure" means an HTTP 5xx response; Linkerd doesn't currently support + response classification for circuit breakers. +- Circuit breakers are configured through annotations on a Service, with all + the relevant annotations containing the term "failure-accrual" in their + names (from the internal name for circuit breaking in the code). + +Circuit breakers in Linkerd are expected to gain functionality rapidly, so +keep an eye out as new releases happen (and the annotation approach should be +supplanted with Gateway API CRDs). + +## Circuit Breaking Example + +To break the circuit after four consecutive request failures, apply these +annotations to a Service: + +```text +balancer.linkerd.io/failure-accrual: consecutive +balancer.linkerd.io/failure-accrual-consecutive-max-failures: 4 +``` + +The `failure-accrual: consecutive` annotation switches on circuit breaking, +and sets it to the "consecutive failure" mode (which is the only supported +mode in 2.13). + +All configuration for the "consecutive failure" mode of circuit breaking uses +annotations that start with `failure-accrual-consecutive-`; the +`failure-accrual-consecutive-max-failures` annotation sets the number of +consecutive failures after which the circuit breaker will open. + +Try reenabling traffic after 30 seconds: + +```text +balancer.linkerd.io/failure-accrual-consecutive-min-penalty: 30s +``` + +(This is for the first attempt. After that, the delay grows exponentially.) + +Don’t ever wait more than 120 seconds between retries: + +```text +balancer.linkerd.io/failure-accrual-consecutive-max-penalty: 120s +``` + +More information on circuit breaking is available in its documentation, at +. + +## Gotchas + +The biggest gotcha of them all is that in Linkerd 2.13, **ServiceProfiles do +not compose with dynamic request routing and circuit breaking**. + +Getting specific, this means that when a ServiceProfile defines routes, it +takes precedence over other HTTPRoutes with conflicting routes, and it also +takes precedence over circuit breakers associated with the workloads +referenced in the ServiceProfile. This is expected to be the case for the +foreseeable future, to minimize surprises when upgrading from a version of +Linkerd without the new features. + +The challenge here, of course, is that there are still several things that +require ServiceProfiles in Linkerd 2.13 (for example, retries and timeouts). +The Linkerd team is actively working to quickly make all of this better, with +a particular short-term focus on rapidly bringing HTTPRoutes to feature parity +with ServiceProfiles. + +### Debugging Dynamic Request Routing and Circuit Breaking + +The most typical failure you'll see when trying to use these new features is +to enable a new feature and see that it doesn't seem to be active. There are +some simple rules of thumb for debugging: + +- First, *check for ServiceProfiles*. Remember that conflicting + ServiceProfiles will always disable HTTPRoutes or circuit breakers. + +- Second, you may need to restart Pods after removing conflicting + ServiceProfiles. This is because the Linkerd proxy needs to determine + whether it is running in 2.12 mode or 2.13 mode, and in some situations it's + still possible for it not to shift between modes smoothly. + +- Finally, there's a new `linkerd diagnostics policy` command, which will dump + a large amount of internal Linkerd state describing what exactly the control + plane is doing with routing. It's _extremely_ verbose, but can show you an + enormous amount of information that can help with debugging problems. + +## Dynamic Request Routing and Circuit Breaking + +Taken together, dynamic request routing and circuit breaking are two important +new additions to Linkerd 2.13. While still a bit limited in 2.13, keep an eye +out: we have big plans for these features as Linkerd's development continues. + +---- + +_If you want more on this topic, check out the [Circuit Breaking and Dynamic +Request Routing Deep +Dive](https://buoyant.io/service-mesh-academy/circuit-breaking-and-dynamic-routing-deep-dive/) +Service Mesh Academy workshop for hands-on exploration of everything I've +talked about here! And, as always, feedback is always welcome -- you can find +me as `@flynn` on the [Linkerd Slack](https://slack.linkerd.io)._ diff --git a/linkerd.io/content/blog/2023/0621-edge-roundup.md b/linkerd.io/content/blog/2023/0621-edge-roundup.md new file mode 100644 index 0000000000..c5a3a564fc --- /dev/null +++ b/linkerd.io/content/blog/2023/0621-edge-roundup.md @@ -0,0 +1,99 @@ +--- +author: 'matei' +date: 2023-06-21T00:00:00Z +title: |- + Linkerd Edge Release Roundup: 21 June 2023 +url: + /2023/06/20/linkerd-edge-roundup/ +thumbnail: '/uploads/2023/06/roundup-clocks-square.png' +featuredImage: '/uploads/2023/06/roundup-clocks-rect.png' +tags: [Linkerd, linkerd, gitops, flux, flagger] +featured: false +--- + +{{< fig + alt="21 June Linkerd Edge Release Roundup" + src="/uploads/2023/06/roundup-clocks-rect.png" >}} + +Linkerd’s edge releases are a big part of our development process that we’re +going to start talking more about – and so far in June, we’ve done a couple of +edge releases that we think everyone should definitely know about! + +On June 20th, we released edge-23.6.2, which introduces timeout capabilities +for HTTPRoutes following the standard proposed in Gateway API +[GEP-1742]. It also includes a +host of small bugfixes and two fixes from community members: + +- `linkerd check` won't skip validation checks when Linkerd is installed with + HA mode using Helm. Thanks [Takumi Sue]! + +- Allow specifying the Docker builder to use when building multi-arch Docker + artifacts. Thanks [Mark Robinson]! + +And on June 13th, we released edge-23.6.1. This edge release switched the +Linkerd CNI plugin so that it always runs in chained mode to reduce startup +races, as well as bringing in two more community fixes: + +- Topology-aware service routing can now be correctly turned off while still +under load. Thanks again, [Mark Robinson]! + +- Last but not least, support specifying a `logFormat` in the multi-cluster + Link Helm Chart. Thanks, [Arnaud Beun]! + +As always, you can install the latest edge release by running: + +```bash +curl --proto '=https' --tlsv1.2 -sSfL https://run.linkerd.io/install-edge | sh +``` + +[Takumi Sue]: https://github.com/mikutas +[Mark Robinson]: https://github.com/MarkSRobinson +[Arnaud Beun]: https://github.com/bunnybilou + +## GEP-1742 timeout support (edge-23.6.2) + +**G**ateway API **E**nhancement **P**roposals - GEPs - are part of the formal +process for modifying the Gateway API, and [GEP-1742] introduces the ability +to specify two kinds of timeouts in an HTTPRoute. Timeouts are an important +feature for reliability, so Linkerd has been following - and participating +in - this GEP with great interest. + +Since it’s still in the “Provisional” state at present, it’s possible that we +may need to make changes here before the next stable Linkerd release, but it’s +far enough along (see Gateway API [PR#1997]) that we think it’s worth +implementing to let people try out. Let us know how it works for you! + +[GEP-1742]: https://gateway-api.sigs.k8s.io/geps/gep-1742/ +[PR#1997]: https://github.com/kubernetes-sigs/gateway-api/pull/1997 + +## CNI changes (edge-23.6.1) + +A big part of what Linkerd has to handle at startup is setting up the network +to allow Linkerd to route within the mesh, which sometimes means using the +Linkerd CNI plugin. CNI plugins are tricky, especially when it comes to +ordering (as you learned if you watched the [Service Mesh Academy episode +about startup]!), and we’ve run across a few situations where race conditions +with the CNI plugin could result in problems. + +To combat these race conditions, we’ve switched our CNI plugin to only use +chained mode. Instead of letting the Linkerd CNI plugin create a CNI +configuration if it doesn’t find one, the plugin will now always wait for some +other part of the CNI chain to create the configuration first. This makes it +much less likely that some other CNI plugin will accidentally overwrite +Linkerd’s configuration, regardless of the CNI plugin provider that’s used. + +## How to give feedback + +We would be delighted to hear how these releases work out for you! The full +changelogs are at + and +, and we’d love +to hear your feedback on [Slack](https://slack.linkerd.io) or at the new +[Buoyant Linkerd Forum](https://linkerd.buoyant.io). Looking forward to +hearing from you – happy meshing!! + +[Service Mesh Academy episode about startup]: https://buoyant.io/service-mesh-academy/what-really-happens-at-startup + +---- + +_Linkerd generally does new edge releases weekly; watch this space to keep up-to-date. Feedback on this blog series is welcome! Just ping `@flynn` on the [Linkerd Slack](https://slack.linkerd.io)._ diff --git a/linkerd.io/content/blog/2023/0713-linkerd-in-production.md b/linkerd.io/content/blog/2023/0713-linkerd-in-production.md new file mode 100644 index 0000000000..fa56c8785e --- /dev/null +++ b/linkerd.io/content/blog/2023/0713-linkerd-in-production.md @@ -0,0 +1,318 @@ +--- +author: 'flynn' +date: 2023-07-13T00:00:00Z +title: |- + Workshop recap: Running Linkerd in Production +url: + /2023/07/13/linkerd-in-production/ +thumbnail: '/uploads/2023/07/jan-huber-0xNbk7D_s6U-square.jpg' +featuredImage: '/uploads/2023/07/jan-huber-0xNbk7D_s6U-rect.jpg' +tags: [Linkerd, linkerd, helm, production, high availability, debug, debugging, alerts, alerting, monitoring] +featured: false +--- + +{{< fig + alt="Airliner Cockpit" + title="image credit: [Jan Huber](https://unsplash.com/@jan_huber?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText)" + src="/uploads/2023/07/jan-huber-0xNbk7D_s6U-rect.jpg" >}} + +_This blog post is based on a workshop that I delivered at Buoyant’s [Service +Mesh Academy](https://buoyant.io/service-mesh-academy). If this seems +interesting, check out the [full +recording](https://buoyant.io/service-mesh-academy/linkerd-in-production-101)!_ + +Linkerd is used in a great many demanding production environments around the +world. Let's take a look at what separates demo environments from production, +and what you need to know to be able to run Linkerd in production and still +sleep through the night. + +## Demo vs Production + +Let's start by clarifying the really important differences between demos and +production. These honestly have less to do with the technology itself, and more +to do with the impact of failures. + +Demo environments are often running in local clusters, and they rarely last +very long. They tend not to really prioritize security and careful +configuration management: after all, if anything goes wrong with the demo +environment, usually it's only a minor irritation. They often just aren't +worth a lot of effort, and the way they're created and set up reflects that. + +Production is different, though. In production, you're likely to be using a +relatively long-lived cluster from a cloud provider, and downtime is likely to +affect your users and your business. Production places a premium on +_stability_: you don't want surprises and you don't want downtime. In turn, +this means security is much more important in production, and it also means +that you really don't want anyone changing things in the production +environment in an uncontrolled way. + +## Productionalization Checklist + +Given that background, let's take a look at a checklist for Linkerd in +production: + +1. Think about certificates and the CNI. +2. Put configuration in version control. +3. Run in high availability mode. +4. Use your own image registry. +5. Understand how to install and upgrade Linkerd. +6. Lock down access wherever you can. +7. Set up monitoring and alerting. +8. Understand how to debug Linkerd. + +You'll note that most of these have nothing to do with your application, but +instead are dealing with broad recommendations for how you set up and run +Linkerd to make day-to-day operations as trouble-free as possible. + +### 1. Think carefully about certificates and the CNI + +These two things are at the top of the list because they're the basis on which +a lot of the rest of Linkerd functions, so it's a great idea to decide how you +want to approach them before installing Linkerd. (You can still change them +after the fact, but you'll need to be careful to avoid downtime.) + +#### Certificates + +Expired certificates are **the** most common reason for a production Linkerd +installation to take downtime, which is a shame because they're 100% +avoidable. There are two major things to consider here: + +- Ideally, **your trust anchor secret key should not be stored on your cluster + at all**. Linkerd only needs access to the public key of the trust anchor, and + keeping the private key out of the cluster makes it that much harder for + evildoers to find it and use it. + +- You should absolutely have automated certificate rotation for the Linkerd + issuer certs at a minimum. **We recommend cert-manager for automated + rotation**: it's worked well for us, and it provides several mechanisms that + help you with the goal of keeping the trust anchor secret key out of the + cluster. + +There's a [Cloud Native Live on certificate management with +Linkerd](https://www.youtube.com/watch?v=TC665X-uzcQ) where we dive into the +details here, but the basic takeaway is that it's a really good idea to use +cert-manager to keep your trust anchor secret outside the cluster, and to +fully automate rotating the issuer certificates every 48 hours or so. The +combination can let you sidestep a lot of production issues while still +keeping things safe. + +#### The CNI + +The **C**ontainer **N**etworking **I**nterface is the chunk of Kubernetes that +Linkerd needs to interact with in order to configure the network for mesh +routing. Linkerd can do this either with an init container, or with the +Linkerd CNI plugin. We recommend **using the init container if you can**. + +You can get a lot more detail in the [SMA on the Linkerd startup +process](https://buoyant.io/service-mesh-academy/what-really-happens-at-startup), +but our reasoning here is that though the CNI plugin is flexible and powerful, +there are also more ways that things can go sideways when using it, so we +prefer the init container. On the other hand, if your Kubernetes runtime +doesn't allow providing Linkerd with the NET_ADMIN and NET_RAW capabilities, +the init container won't work and the CNI plugin is the way to go. + +### 2. Put configuration in version control + +Another common source of problems in production – in general, not just with +Linkerd – is uncontrolled changes made to a production cluster. If you don't +know what's actually running in the cluster, keeping it running correctly is +impossible. Fortunately, it's not hard to sidestep this issue: just **put your +configuration under version control** using Git or some similar tool. + +Note that this is not to say that you must fully adopt GitOps (though you +should at least consider it). While GitOps can be incredibly useful (there's a +whole [SMA on GitOps and +Linkerd](https://buoyant.io/service-mesh-academy/real-world-gitops-with-flagger-and-linkerd)), +the fact is that you can get an enormous amount of benefit just by using Helm +and checking your `values.yaml` into Git. + +(Why Helm? Well, it's very unlikely that the YAML that we ship with Linkerd +will be exactly right for your deployment. You should expect to need to make +changes -- and in many real-world scenarios, making those changes in a Helm +`values.yaml` which you then keep in version control is much simpler than +maintaining patches or `kustomization`s for them.) + +### 3. Run in high availability mode + +Linkerd's **H**igh **A**vailability (HA) mode changes the way Linkerd is +deployed to eliminate single points of failure and ensure maximum availability +for your cluster, so **definitely use HA mode for production use**. + +In HA mode, Linkerd deploys three replicas of each control plane component to +ensure that no single control-plane component failure can take down your +entire control plane. It also provides resource limits for the control-plane +components to help out the Kubernetes scheduler; you are strongly encouraged +to check the resource limits and make sure that they are appropriate for your +application. + +HA mode also adds a strict requirement that Linkerd's proxy-injector be fully +operational before any other pods can start, in order to prevent early pods +from accidentally starting without mTLS. This is implemented using an +admission webhook, so it is _critical_ that you annotate the `kube-system` +namespace with `config.linkerd.io/admission-webhooks=disabled`: this will +prevent a deadlock where Linkerd is waiting for Kubernetes to be fully +running, but Kubernetes is waiting for the Linkerd admission webhook! + +Finally, note that HA mode _requires_ each of the three control-plane replicas +run on different Nodes, which means that your cluster must have at least three +Nodes to use HA mode. (This is the reason why HA mode isn't the default: it +won't work on single-Node demo clusters.) + +For more details about Linkerd's HA mode, check out the [Linkerd HA mode +documentation](https://linkerd.io/2.13/features/ha/). + +### 4. Use your own image registry + +Another critical consideration when preparing Linkerd for production use is +managing your images. Linkerd's released images are published to a single +registry, the GitHub Container Registry (GHCR). While this usually works just +fine, it means that Pods won't be able to start if GHCR becomes unavailable or +unreachable. + +The simplest way to mitigate this risk is to **run your own image registry**, +putting the availability of your images under your direct control. This often +sounds daunting, but it's actually not that hard: the Linkerd [private Docker +registry +documentation](https://linkerd.io/2.13/tasks/using-a-private-docker-repository/) +covers exactly how get things set up. + +### 5. Understand how to install and upgrade Linkerd + +For production use, we recommend using Helm to install Linkerd, and we +recommend using HA mode. This makes it critical to understand how to actually +use Helm for installation and upgrades. + +#### Installation + +You'll be using Helm to install Linkerd in HA mode, so you'll need to **grab +the `values-ha.yaml` file from the Helm chart**: run `helm fetch --untar +linkerd/linkerd-control-plane`, then copy +`linkerd-control-plane/values-ha.yaml` into your version control system. +`values-ha.yaml` shouldn't need any edits, but it's worth a read to make sure +of that. + +After you've vetted `values-ha.yaml`, you'll run `helm install` with the `-f +path/to/your/values-ha.yaml` option. The [Linkerd documentation on installing +with Helm](https://linkerd.io/2/tasks/install-helm/) goes into much more +detail here. + +#### Upgrades + +Linkerd upgrades are usually straightforward, but always read the release +notes and always test in non-production environments. **Upgrade the control +plane first** with `helm upgrade`, then gradually roll out data-plane upgrades +by restarting workloads and allowing the control plane to inject the new +version of the proxy. (There are more details on this process in the [Linkerd upgrade documentation](https://linkerd.io/2/tasks/upgrade/)). + +Order matters here: doing the control plane first is always supported, as the +data plane is designed to handle the temporary skew – but **don't skip major +versions** when upgrading. Going from 2.10.2 to 2.11.1 to 2.12.3 is fine; +going directly from 2.10.2 to 2.12.3 is not supported. + +It's also worth pointing out the `reuse-values` and `reset-values` Helm flags. +Basically, `reuse-values` tells Helm to use the values from your previous +installation, where `reset-values` tells Helm to use values from the new chart +instead. (Command-line overrides take effect in all cases.) + +(Hopefully you'll never need to downgrade Linkerd, but if you do, the process +is exactly the same as an upgrade -- control plane first, then data plane. And +since Helm doesn't have a specific command for it, you actually get it done by +running `helm upgrade --version` with an older version. Having your +configuration in version control really shines in this case, too.) + +### 6. Lock down access wherever you can + +Once installed, pay attention to access controls. In a Linkerd mesh, each of +the several microservices that work together to form your cloud-native +application has its own identity, which permits Linkerd to provide very +fine-grained authorization controls for communications between microservices. +For instance, does the microservice that serves your UI's HTML really need +access to check a user's bank balance? Most likely not. + +You can improve the security of your application as a whole by following the +principle of least privilege: **don't allow access that microservices don't +need**. There's an [SMA on policy management with +Linkerd](https://buoyant.io/service-mesh-academy/a-deep-dive-into-route-based-policy) +which goes into great detail on this, but the simplest approach is to restrict +access to entire namespaces initially, then progressively add routes as +necessary. A useful tool to start with is the `linkerd viz profile --tap` +command, which can generate a ServiceProfile based on observed traffic. + +### 7. Set up monitoring and alerting + +The last piece that you'll definitely need is efficient monitoring and +alerting. **Do not use the Prometheus installed by `linkerd viz` in +production**: the right home for your production metrics is off-cluster, which +might mean using [Buoyant Cloud](https://buoyant.io/cloud), a metrics provider +like [Datadog](https://datadog.io), or your own [external +Prometheus](https://linkerd.io/2.13/tasks/external-prometheus/). + +You should **set up alerts for the both control plane and the data plane**. +Your control plane components should basically always show 100% success: +anything lower should be investigated. Alerts for latency and resource usage +can be extremely helpful, but you'll need to determine the limits emperically for your situation. + +Finally, don't forget to **set up alerts for TLS certificate expirations**! +This is a simple measure that can save you an enormous amount of pain. + +### 8. Understand how to debug Linkerd + +With a little luck, you'll never need to debug Linkerd, but if you do, it's +important to remember that at its core, **debugging Linkerd is just debugging +a Kubernetes workload**, albeit a complex one. This means that the usual +`kubectl` commands are still helpful, as are several commands from the +`linkerd` CLI: + +- `kubectl events`: Use this for any Pod state other than Running. +- `kubectl logs`: This command will allow you to view container logs. +- `kubectl get`: This shows status information, particularly relevant for + Gateway API resources. +- `linkerd check`: This command validates that Linkerd is functioning + correctly. +- `linkerd diagnostics proxy-metrics po/ -n linkerd`: This provides + metrics for the proxy running in the specified pod. +- `linkerd viz tap`: This helps observe requests as they flow through your + application. +- `linkerd identity`: This provides information about the mTLS certificates of + a workload. +- `linkerd authz` and `linkerd diagnostics policy`: These are new commands + introduced in version 2.11 and 2.13, respectively, and are helpful for + policy troubleshooting. + +If you're looking into the logs, you might need to set the log level higher. +This gets a little complex: + +- For the proxies and the controllers in the control plane, you can change the + log level globally by editing the `logLevel` values found in the Helm chart. +- For the proxy, you can annotate individual workloads or namespaces, for + example `config.linkerd.io/proxy-log-level: warn,linkerd2_proxy=trace`. See + the [documentation on setting the proxy's log + level](https://linkerd.io/2.13/tasks/modifying-proxy-log-level/) for more + options. +- For individual controllers, you can modify the [Helm + templates](https://github.com/linkerd/linkerd2/blob/main/charts/linkerd-control-plane/templates/) + for the desired controller and reinstall. +- Note that the Policy controller and the proxy use Rust levels (like + `linkerd=info,warn`), while others use Golang formats (like `error`). + +Last but not least, there's the Linkerd debug sidecar, which comes equipped +with `tshark`, `tcpdump`, `lsof`, and `iproute2`. If your runtime allows it, +it can be very useful for debugging: check out the [documentation on using the +debug sidecar](https://linkerd.io/2/tasks/using-the-debug-container) for the +details here. + +## Linkerd in Production + +There's a lot of text and references above, but what you'll find going through +it all is that it's really not difficult to get a rock-solid Linkerd +installation running demanding real-world production environments. Linkerd's +ability to easily provide security, reliability, and observability, while +maintaining operational simplicity and performance, make a world of difference +in this kind of application. + +If you found this interesting, check out the Service Mesh Academy workshop on +[Linkerd in Production 101: updated for +2.13](https://buoyant.io/service-mesh-academy/linkerd-in-production-101) for +hands-on exploration of everything I've talked about here! And, as always, +feedback is always welcome -- you can find me as `@flynn` on the [Linkerd +Slack](https://slack.linkerd.io). diff --git a/linkerd.io/content/blog/2023/0720-flat-networks.md b/linkerd.io/content/blog/2023/0720-flat-networks.md new file mode 100644 index 0000000000..10169f70ac --- /dev/null +++ b/linkerd.io/content/blog/2023/0720-flat-networks.md @@ -0,0 +1,146 @@ +--- +author: 'william' +date: 2023-07-20T00:00:00Z +title: |- + Enterprise multi-cluster at scale: supporting flat networks in Linkerd +thumbnail: '/uploads/2023/07/nasa-_SFJhRPzJHs-unsplash.jpg' +featuredImage: '/uploads/2023/07/nasa-_SFJhRPzJHs-unsplash.jpg' +tags: [Linkerd] +--- + +{{< fig + alt="An image of Manhattan at night, shot from the atmosphere" + src="/uploads/2023/07/nasa-_SFJhRPzJHs-unsplash.jpg" >}} + +Linkerd has seen a steady rise in enterprise adoption, with companies like +[Adidas](https://buoyant.io/case-studies/adidas), +[Microsoft](https://buoyant.io/case-studies/xbox), +[Plaid](https://www.cncf.io/blog/2023/07/17/plaid-pain-free-deployments-at-global-scale/), +and [DB Schenker](https://buoyant.io/case-studies/schenker) deploying Linkerd at +scale to bring security, compliance, and reliability to their mission-critical +production infrastructure. Based on feedback from this enterprise audience, the +upcoming Linkerd 2.14 release will introduce a new set of features designed to +handle types of multi-cluster Kubernetes configurations commonly found in +enterprise deployments. + +One of the most important new features in the next Linkerd release will be an +improved ability to handle multi-cluster communication in environments with a +shared flat network between clusters. In this blog post, I'll talk about what +that means and why it's important. + +## How does Linkerd handle multi-cluster today? + +If you're using multiple Kubernetes clusters and want them to communicate with +each other, Linkerd gives you the ability to send traffic across cluster +boundaries that is: + +1. **Fully secured**. This means that traffic between clusters is encrypted, + authenticated, and authorized using mutual TLS, workload identities (not + network identities!) and Linkerd's fine-grained, [zero-trust authorization + policies](https://linkerd.io/2/features/server-policy/). +2. **Transparent to the application.** This means that the application is + totally decoupled from cluster topology, which allows the operator to + take advantage of powerful networking capabilities such as [dynamically + failover traffic to other + clusters](https://linkerd.io/2/tasks/automatic-failover/). +3. **Observable and reliable**. Linkerd's powerful L7 instrospection and + reliability mechanisms, including golden metrics, retries, timeouts, + distributed tracing, circuit breaking, and more, are all available to + cross-cluster traffic just as they are to on-cluster traffic. + +Linkerd has supported multi-cluster Kubernetes deployments since the release of +Linkerd 2.8 in 2020. That release introduced [a simple and elegant +design](https://linkerd.io/2.13/features/multicluster/) that involves the +addition of a service mirror component to handle service discovery, and a +multi-cluster gateway component to handle traffic from other clusters. + +This gateway design allowed Linkerd's multi-cluster support to be entirely +independent of underlying network topology. Whether your clusters are colocated +in the same datacenter; split across different cloud providers; or deployed in a +hybrid fashion between on-premises and cloud deployments, Linkerd worked the +same way. + +This design has worked well! However, as Kubernetes adoption has grown in +enterprise environments, we've seen a growing number of cases where clusters are +deployed in a shared _flat network_. In this situation, we can make some +significant optimizations by removing the gateway. + +## Multi-cluster for flat networks + +In a shared flat network situation, pods in different Kubernetes clusters can +route traffic directly to each other. In other words, a pod in cluster 1 can +establish a TCP connection to a pod in cluster 2, just using the underlying +network. + +If pods are routable, why use Linkerd? For exactly the same reasons you're using +it within the cluster: to provide the security, reliability, and observability +guarantees beyond what a baseline TCP connection provides. + +In Linkerd 2.14, we'll introduce an additional mode of multi-cluster +communication designed for shared flat networks: direct pod-to-pod communication +between clusters without the gateway intermediary. + +{{< fig + alt="An architectural diagram comparing hierarchical network mode with the new flat network mode" + src="/uploads/2023/07/flat_network@2x.png">}} + +In this approach, as you might imagine, Linkerd will route communication from a +pod on the source cluster directly to the destination pod on another cluster +without transiting the gateway. This provides several advantages, including: + +* **Improved latency** of cross-cluster calls by removing the additional hop + between client and server. +* **Improved security** by preserving workload identity in mTLS calls across + clusters, rather than overriding it with the gateway identity. +* **Reduced cloud spend** by reducing the amount of traffic that is routed through the + multi-cluster gateway, which is often implemented as a cloud loud balancer. + +This approach still preserves two critical aspects of Linkerd's multi-cluster +design: + +1. **Separation of failure domains.** Each Kubernetes cluster runs its own + Linkerd control plane, independently of other clusters, and the failure of a + single cluster cannot take down the service mesh on other clusters. +2. **Standardized, uniform architecture.**. Unlike other solutions that split + L7 logic between complex proxies operating at different levels and scopes, + Linkerd's Rust-based "micro-proxy" sidecars are the sole mechanism for + controlling traffic between pods and clusters, giving you a single + operational surface area to monitor and manage, with clear isolation of + failure and security domains. + +Finally, this approach improves Linkerd's ability to provide a uniform layer of +authentication across your entire environment, and to enforce granular +authorization policies, aka "micro-segmentation". Because the gateway is no +longer an intermediary, cross-cluster connections retain the workload identity +of the source, and authorization policies can be crafted to take advantage of +these identities directly. + +(For Kubernetes experts, note that this implementation is inspired by, and +loosely aligns with, the [Multi-Cluster Services API proposal +(KEP-1645](https://github.com/kubernetes/enhancements/tree/master/keps/sig-multicluster/1645-multi-cluster-services-api)). +While strict conformance with this KEP is not currently a goal, we look forward +to seeing how that proposal evolves.) + +## So when do we get this amazing new feature? + +Linkerd 2.14 will be shipping next month. With the addition of pod-to-pod +communication, we're confident that Linkerd will continue to be the simplest way +to connect multiple Kubernetes clusters, now including for deployments that can +make use of flat networks. + +## Linkerd is for everyone + +Linkerd is a [graduated project](/2021/07/28/announcing-cncf-graduation/) of the +[Cloud Native Computing Foundation](https://cncf.io/). Linkerd is [committed to +open +governance.](https://linkerd.io/2019/10/03/linkerds-commitment-to-open-governance/) +If you have feature requests, questions, or comments, we'd love to have you join +our rapidly-growing community! Linkerd is hosted on +[GitHub](https://github.com/linkerd/), and we have a thriving community on +[Slack](https://slack.linkerd.io/), [Twitter](https://twitter.com/linkerd), and +the [mailing lists](https://linkerd.io/2/get-involved/). Come and join the fun! + +(*Photo by +[NASA](https://unsplash.com/@nasa?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText) +on +[Unsplash](https://unsplash.com/photos/_SFJhRPzJHs?utm_source=unsplash&utm_medium=referral&utm_content=creditCopyText)*) diff --git a/linkerd.io/content/blog/2023/0725-linkerd-hero.md b/linkerd.io/content/blog/2023/0725-linkerd-hero.md new file mode 100644 index 0000000000..65fea6614d --- /dev/null +++ b/linkerd.io/content/blog/2023/0725-linkerd-hero.md @@ -0,0 +1,56 @@ +--- +author: catherine +date: 2023-07-25T00:00:00Z +title: Announcing the July 2023 Linkerd Hero! +description: |- + The votes are in! This month's Linkerd Hero is Mikael Fridh for fixing an + incompatibility between Linkerd's CNI plugin and newer versions of EKS's + VPC CNI plugin. +url: + /2023/07/25/announcing-the-july-2023-linkerd-hero/ +featuredImage: '/uploads/2023/07/mikael-fridh-featured.png' +thumbnail: '/uploads/2023/07/mikael-fridh-hero-square.png' +tags: + - Community + - Hero +items: + - +keywords: [hero, community, contributor, code] +--- + +## Announcing July's Linkerd Hero + +We are excited to announce this month's Linkerd Hero: Mikael Fridh. +Congrats, Mikael! + +## What are Linkerd Heroes? + +Linkerd Heroes are community members who best represent the spirit of the +Linkerd community. Whether it's helping others, answering questions, sharing +their successes (and failures!) with the world at large, or contributing code +or docs, Linkerd Heroes are instrumental in making the Linkerd community the +amazing place it is today. + +## Congrats, Mikael! + +This month's hero is [Mikael Fridh](https://www.linkedin.com/in/mikaelfridh/). +Mikael reported an incompatibility between Linkerd's CNI plugin and newer +versions of EKS's VPC CNI plugin. He demonstrated great domain insight by +quickly providing a succinct fix, that we immediately incorporated into the +2.12 and 2.13 branches of Linkerd. Thank you, Mikael, for helping rapidly +fixing issues, improving everyone's Linkerd experience! + +![Mikael Fridh](/uploads/2023/07/mikael-fridh-hero.png) + +## Nominate next month's Linkerd Hero + +Linkerd Heroes take many forms. Perhaps someone has answered your or the +community’s pressing questions on Slack. Maybe you've read a blog post or +watched a conference talk that helped spur your decision-making process or +advance your Linkerd implementation. Or perhaps someone contributed an awesome +Linkerd feature or bugfix that has made your life a lot easier. + +Who is your Linkerd Hero? [Submit your nomination for next month's hero +here!](https://docs.google.com/forms/d/e/1FAIpQLSfNv--UnbbZSzW7J3SbREIMI-HaooyX9im8yLIGB7M_LKT_Fw/viewform?usp=sf_link) + +For those who nominated someone, thank you for participating! diff --git a/linkerd.io/content/blog/2023/0807-edge-release-roundup.md b/linkerd.io/content/blog/2023/0807-edge-release-roundup.md new file mode 100644 index 0000000000..60626e7eff --- /dev/null +++ b/linkerd.io/content/blog/2023/0807-edge-release-roundup.md @@ -0,0 +1,188 @@ +--- +author: 'alejandro' +date: 2023-08-07T00:00:00Z +title: |- + Linkerd Edge Release Roundup: August 2023 +url: + /2023/08/07/linkerd-edge-release-roundup/ +thumbnail: '/uploads/2023/06/roundup-clocks-square.png' +featuredImage: '/uploads/2023/06/roundup-clocks-rect.png' +tags: [Linkerd, linkerd, edge, release, roundup] +featured: false +--- + +{{< fig + alt="August Linkerd Edge Release Roundup" + src="/uploads/2023/06/roundup-clocks-rect.png" >}} + +Linkerd’s edge releases are a big part of our development process, and there +have been a lot of them - five! - since our last edge-release roundup. The +plan is do these roundups more frequently to keep things manageable, but for +this one, we'll hit some highlights and then do a release-by-release list at +the end. + +## Community Contributions + +We couldn't do what we do without the Linkerd community, and this batch of +releases is definitely no exception. Huge thanks to [@hiteshwani29], [Abhijeet +Gaurav], [Grégoire Bellon-Gervais], [Harsh Soni], [Jean-Charles Legras], and +[Miguel Elias dos Santos] for their contributions across a wide range of +areas, from the Linkerd CLI to host networking! You'll find more information +about all of these contributions in the release-by-release details below. + +[@hiteshwani29]:https://github.com/hiteshwani29 +[Abhijeet Gaurav]:https://github.com/abhijeetgauravm +[Grégoire Bellon-Gervais]:https://github.com/albundy83 +[Harsh Soni]:https://github.com/harsh020 +[Jean-Charles Legras]:https://github.com/jclegras +[Miguel Elias dos Santos]:https://github.com/migueleliasweb + +## Gateway API + +From the feature perspective, our main focus over the last several edge +releases has been improving our Gateway API support, bringing us closer to +feature parity between [HTTPRoutes] and [ServiceProfiles]: + +- We added support for the Gateway API's `gateway.networking.k8s.io` APIGroup + to Linkerd in edge-23.7.1 on July 7th. This is a major step toward + conformance with the Gateway API's [Mesh profile]. (We're not turning off + support for `policy.linkerd.io` though, that's still quite a ways away.) + +- We added support for HTTPRoutes defined in the namespace from which a route + is called in edge-23.7.3 on July 28th. The Gateway API calls these + [_consumer routes_][consumer-routes] since the use case is, usually, doing + things like overriding the timeout for a workload you're calling. You can + learn more about this in the [Gateway API Mesh routing + documentation][gamma-routing]. + +- We also made HTTPRoute `parentRefs` port numbers optional in edge-23.7.3, + per the [HTTPRoute standard]. + +- Finally, we started adding support for [HTTPRoute filters]: + `RequestHeaderModifier` and `RequestRedirect` are supported in edge-23.7.2, + and `ResponseHeaderModifier` is supported in edge-23.7.3 (so edge-23.7.2 + added header modifications for _requests_, and edge-23.7.3 added header + modifications for _responses_). + +[HTTPRoutes]:https://gateway-api.sigs.k8s.io/api-types/httproute/ +[HTTPRoute standard]:https://gateway-api.sigs.k8s.io/references/spec/#gateway.networking.k8s.io/v1alpha2.HTTPRoute +[ServiceProfiles]:https://linkerd.io/2.13/features/service-profiles/ +[consumer-routes]:https://gateway-api.sigs.k8s.io/concepts/glossary/#consumer-route +[gamma-routing]:https://gateway-api.sigs.k8s.io/concepts/gamma/#how-the-gateway-api-works-for-service-mesh +[Mesh profile]:https://gateway-api.sigs.k8s.io/geps/gep-1686/ +[HTTPRoute filters]:https://gateway-api.sigs.k8s.io/references/spec/#gateway.networking.k8s.io/v1beta1.HTTPRouteFilter + +## Fixes + +Of the many fixes in these five releases, two in particular stand out: + +1. In edge-23.7.3, we fixed a race condition where the Linkerd destination + controller could panic in an environment with high churn of Endpoints or + Servers. The most common effect here is seeing restarts of the destination + controller Pods, but it could also result in traffic being sent to the + wrong destination endpoints. + + This is covered in Linkerd issue [#11163]. + +2. In edge-23.8.1, we raised the default capacities of the HTTP request queues + (both inbound and outbound) back to 10,000 after lowering them for Linkerd + 2.13. The effect here is that in situations where a single destination + workload needed to accept a lot of concurrent traffic, the Linkerd proxies + would decide that they had too much load, and start shedding it by dropping + connections. This happened much more agressively in Linkerd 2.13 than in + Linkerd 2.12; it's fixed in edge-23.8.1. + + This is covered in Linkerd issue [#11055] and PR [#11198]. + +[#11163]:https://github.com/linkerd/linkerd2/issues/11163 +[#11055]:https://github.com/linkerd/linkerd2/issues/11055 +[#11198]:https://github.com/linkerd/linkerd2/pull/11198 + +## Installing the Latest Edge Release + +```bash +curl --proto '=https' --tlsv1.2 -sSfL https://run.linkerd.io/install-edge | sh +``` + +## How to give feedback + +We would be delighted to hear how these releases work out for you! The full +changelogs are at +, +, +, +, and +. We’d love to +hear your feedback on [Slack](https://slack.linkerd.io) or at the new [Buoyant +Linkerd Forum](https://linkerd.buoyant.io). Looking forward to hearing from +you – happy meshing!! + +## Release Details + +- edge-23.6.3, on June 30th, was all about a couple of community contributions: + + - [@hiteshwani29] added JSONpath output to `linkerd viz tap`. + + - [Jean-Charles Legras] fixed a proxy startup failure that could happen with + the `config.linkerd.io/admin-port` annotation. + +- edge-23.7.1, on July 7th, started our Gateway API theme and also made a + couple of fixes: + + - We added support for the Gateway API's `gateway.networking.k8s.io` + APIGroup to Linkerd (a major step toward conformance with the Gateway + API's [Mesh profile]). + + - We fixed a problem where the ingress-mode proxy wouldn't always correctly + use ServiceProfiles for destinations with no HTTPRoutes. + + - We added distinguishable version information to the proxy's logs and metrics. + +- edge-23.7.2, on July 13th, continued the Gateway API theme and pulled in a + community fix: + + - We added support for HTTPRoute's `RequestHeaderModifier` and + `RequestRedirect` [filters]. + + - [Miguel Elias dos Santos] fixed a `linkerd-cni` chart problem that could + block the CNI pods from coming up when the injector was broken. + +- edge-23.7.3, on July 28th, was our largest edge release: it had a lot of + Gateway API work and several fixes. + + - We made HTTPRoute `parentRefs` port numbers optional, per the [HTTPRoute + standard]. + + - We added support for Gateway API [_consumer routes_][consumer-routes]. + + - We added support for HTTPRoute's `ResponseHeaderModifier` + [filter][HTTPRoute filters]. + + - [Grégoire Bellon-Gervais] fixed a Grafana error caused by an incorrect + datasource. + + - [Harsh Soni] fixed the linkerd extension CLI commands so that they prefer + the `--register` flag over the `LINKERD_DOCKER_REGISTRY` environment + variable, for consistency. + + - We fixed a race condition that could cause the destination controller to + panic. + + - We added high-availability mode for the multicluster service mirror, and + further improved control-plane logging. + + - We added support for disabling the network validator security context if + you're in an environment that defines its own security context. + +- Last but not least: edge-23.8.1, on August 3rd, brought in a couple of + very important bugfixes: + + - [Abhijeet Gaurav] made it possible to use the `linkerd-cni` DaemonSet + without needing host networking support. + + - We raised the default capacities of the HTTP request queues back to + 10,000. + +---- + +_Linkerd generally does new edge releases weekly; watch this space to keep up-to-date. Feedback on this blog series is welcome! Just ping `@flynn` on the [Linkerd Slack](https://slack.linkerd.io)._ diff --git a/linkerd.io/content/blog/_index.md b/linkerd.io/content/blog/_index.md index f592c71da6..d08d694927 100644 --- a/linkerd.io/content/blog/_index.md +++ b/linkerd.io/content/blog/_index.md @@ -7,8 +7,8 @@ tags: - Tutorials & How-To's - Video items: - - blog/2023/0526-osm-migration.md - - blog/2023/0515-real-world-gitops.md -description: '' + - blog/2023/0720-flat-networks.md + - blog/2023/0713-linkerd-in-production.md +description: Read the latest blog posts covering the Linkerd service mesh, from technical tutorials to announcements to what’s next on the roadmap. keywords: [] --- diff --git a/linkerd.io/content/blog/a-service-mesh-for-ecs.md b/linkerd.io/content/blog/a-service-mesh-for-ecs.md deleted file mode 100644 index aa716de568..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-ecs.md +++ /dev/null @@ -1,273 +0,0 @@ ---- -slug: 'a-service-mesh-for-ecs' -title: 'A Service Mesh For ECS' -aliases: - - /2017/08/08/a-service-mesh-for-ecs/ -author: 'andrew' -date: Tue, 08 Aug 2017 15:08:30 +0000 -draft: false -thumbnail: /uploads/linkerd_featured.png -featured: false -tags: - [ - aws, - ECS, - Linkerd, - linkerd, - service mesh, - tutorials, - Tutorials & How-To's, - ] ---- - -Linkerd, our open source service mesh for cloud native applications, adds reliability and visibility to microservices by managing all of the internal communication between services. Deployed as a set of transparent layer 5/7 proxies, the Linkerd service mesh provides a consistent, global layer for monitoring and controlling all internal, service-to-service traffic within an application. (For more on the service mesh model, read William’s article, [What's a service mesh? And why do I need one?]({{< relref "whats-a-service-mesh-and-why-do-i-need-one" >}})) - -One of Linkerd’s strengths is its ability to integrate with many different environments (and to allow you to bridge environments!). In previous posts, we’ve covered how to use Linkerd with [Kubernetes][part-i] and [DC/OS](https://buoyant.io/2016/04/19/linkerd-dcos-microservices-in-production-made-easy/). In this post, we describe how to use Linkerd with Amazon ECS. - -All commands and config files referenced in this post may be found in the [linkerd-examples repo](https://github.com/linkerd/linkerd-examples/tree/master/ecs). - -## Overview - -This post will show you how to set up Linkerd as a service mesh on ECS, using Consul for service discovery, linkerd-viz for monitoring, and a hello-world sample app, as seen in the diagram below: - -{{< fig - alt="Linkerd: A Service Mesh for ECS" - title="Linkerd: A Service Mesh for ECS" - src="/uploads/2018/05/service-mesh-for-ECS@2x.png" >}} - -## Initial Setup - -This post assumes you have already configured AWS with the proper IAM, key pairs, and VPCs for an ECS cluster. For more information on these topics, have a look at Amazon’s [Setting Up with Amazon ECS guide](http://docs.aws.amazon.com/AmazonECS/latest/developerguide/get-set-up-for-amazon-ecs.html). - -Set a key pair you will use to access your instances, or omit the parameter to forego ssh access: - -```bash -KEY_PAIR=MY_KEY_PAIR_NAME -``` - -Next, create a Security Group: - -```bash -GROUP_ID=$(aws ec2 create-security-group --group-name l5d-demo-sg --description "Linkerd Demo" | jq -r .GroupId) -aws ec2 authorize-security-group-ingress --group-id $GROUP_ID \ - --ip-permissions \ - FromPort=22,IpProtocol=tcp,ToPort=22,IpRanges=[{CidrIp="0.0.0.0/0"}] \ - FromPort=4140,IpProtocol=tcp,ToPort=4140,IpRanges=[{CidrIp="0.0.0.0/0"}] \ - FromPort=9990,IpProtocol=tcp,ToPort=9990,IpRanges=[{CidrIp="0.0.0.0/0"}] \ - FromPort=3000,IpProtocol=tcp,ToPort=3000,IpRanges=[{CidrIp="0.0.0.0/0"}] \ - FromPort=8500,IpProtocol=tcp,ToPort=8500,IpRanges=[{CidrIp="0.0.0.0/0"}] \ - IpProtocol=-1,UserIdGroupPairs=[{GroupId=$GROUP_ID}] -``` - -## Set up Consul - -For demonstration purposes, we run a single Consul server outside of the ECS cluster: - -```bash -aws ec2 run-instances --image-id ami-7d664a1d \ - --instance-type m4.xlarge \ - --user-data file://consul-server-user-data.txt \ - --placement AvailabilityZone=us-west-1a \ - --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=l5d-demo-consul-server}]" \ - --key-name $KEY\_PAIR --security-group-ids $GROUP_ID -``` - -## Set up ECS - -### Create a new cluster - -```bash -aws ecs create-cluster --cluster-name l5d-demo -``` - -### Create a Role Policy - -```bash -aws iam put-role-policy --role-name ecsInstanceRole --policy-name l5dDemoPolicy --policy-document file://ecs-role-policy.json -``` - -### Register Task Definitions - -```bash -aws ecs register-task-definition --cli-input-json file://linkerd-task-definition.json -aws ecs register-task-definition --cli-input-json file://linkerd-viz-task-definition.json -aws ecs register-task-definition --cli-input-json file://consul-agent-task-definition.json -aws ecs register-task-definition --cli-input-json file://consul-registrator-task-definition.json -aws ecs register-task-definition --cli-input-json file://hello-world-task-definition.json -``` - -### Create Launch Configuration - -This step defines a Launch Configuration. We configure our ECS cluster to boot Linkerd and consul on each ECS node. - -```bash -aws autoscaling create-launch-configuration \ - --launch-configuration-name l5d-demo-lc \ - --image-id ami-7d664a1d \ - --instance-type m4.xlarge \ - --user-data file://ecs-user-data.txt \ - --iam-instance-profile ecsInstanceRole \ - --security-groups $GROUP_ID \ - --key-name $KEY_PAIR -``` - -Note ecs-user-data.txt dynamically generates config files for each of _Linkerd_, _consul-agent_, and _consul-registrator_, using data specific to the ECS Instance it is running on. - -### Create an Auto Scaling Group - -This step actually creates the EC2 instances, based on the Launch Configuration defined above. Upon completion, we should have two ECS nodes, each running Linkerd, consul-agent, and consul-registrator. - -```bash -aws autoscaling create-auto-scaling-group \ - --auto-scaling-group-name l5d-demo-asg \ - --launch-configuration-name l5d-demo-lc \ - --min-size 1 --max-size 3 --desired-capacity 2 \ - --tags ResourceId=l5d-demo-asg,ResourceType=auto-scaling-group,Key=Name,Value=l5d-demo-ecs,PropagateAtLaunch=true \ - --availability-zones us-west-1a -``` - -We name our instances _l5d-demo-ecs_ so we can programmatically find them later on. - -### Deploy the hello-world sample application - -Now that all our foundational services are deployed, we can deploy a sample app. The _hello-world_ task is composed of a _hello_ service, a _world_ service, and a _world-v2_ service. To demonstrate inter-service communication, we configure the _hello_ service to call the _world_ service via _Linkerd_. - -```bash -aws ecs run-task --cluster l5d-demo --task-definition hello-world --count 2 -``` - -Note that we have deployed two instances of hello-world, which results in two hello containers, two world containers, and two world-v2 containers. - -## Did it work? - -If everything deployed correctly, we should see 8 tasks running in our [ECS dashboard](https://us-west-1.console.aws.amazon.com/ecs/home?region=us-west-1#/clusters/l5d-demo/tasks): - -{{< fig - alt="ECS Tasks" - title="ECS Tasks" - src="/uploads/2018/05/ecs-tasks-1024x589.png" >}} - -We select an arbitrary ECS node, via the _l5d-demo-ecs_ name, then curl the _hello_ service via _Linkerd_: - -```bash -ECS_NODE=\$( \ - aws ec2 describe-instances \ - --filters Name=instance-state-name,Values=running Name=tag:Name,Values=l5d-demo-ecs \ - --query Reservations[*].Instances[0].PublicDnsName --output text \ -) -``` - -Now test routing: - -```bash -$ http_proxy=$ECS_NODE:4140 curl hello -Hello (172.31.20.160) World (172.31.19.35)!! -``` - -If everything worked correctly, we should get a reply from the _hello_ service, with data from the world service. View Linkerd and Consul UIs: - -```bash -# (osx only) -open http://$ECS_NODE:9990 -open http://$ECS_NODE:8500 -``` - -## Test dynamic request routing - -One of Linkerd's most powerful features is dynamic request routing. Here we'll demonstrate routing a single request to the world-v2 service, rather than the default _world_ service: - -```bash -$ http_proxy=$ECS_NODE:4140 curl -H 'l5d-dtab: /svc/world => /svc/world-v2' hello -Hello (172.31.20.160) World-V2 (172.31.19.35)!! -``` - -The request flow we just tested: - -```txt -curl -> linkerd -> hello -> linkerd -> world-v2 -``` - -By setting the _l5d-dtab_ header, we instructed Linkerd to dynamically route all requests destined for _world_ to _world-v2_, even though the request initially transited through the hello service. - -{{< fig - alt="Per-request routing with Linkerd" - title="Per-request routing with Linkerd" - src="/uploads/2018/05/per_request_routing@2x.png" >}} - -For more information, have a look at [Dynamic Request Routing](https://linkerd.io/features/routing/). - -### Monitoring the services - -Linkerd instruments all traffic and exposes these metrics, including top-line service metrics like success rates and latencies. By using the Linkerd service mesh, we can automatically collect these valuable metrics without having to modify our application! - -Since Linkerd itself is purely distributed, however, we need to aggregate these results. For convenience, we provide a simple open source package, [linkerd-viz](https://github.com/linkerd/linkerd-viz), which can collect and displays metrics for all Linkerd's running in a cluster. - -Prior to deploying linkerd-viz, let's put some load through our system: - -```bash -while true; do http_proxy=$ECS_NODE:4140 curl -s -o /dev/null hello; done -``` - -Now deploy a single linkerd-viz instance: - -```bash -aws ecs run-task --cluster l5d-demo --task-definition linkerd-viz --count 1 -``` - -Now bring up the _linkerd-viz_ dashboard: - -```bash -# find the ECS node running linkerd-viz -TASK_ID=$( \ - aws ecs list-tasks \ - --cluster l5d-demo \ - --family linkerd-viz \ - --desired-status RUNNING \ - --query taskArns[0] \ - --output text) -CONTAINER_INSTANCE=$( \ - aws ecs describe-tasks \ - --cluster l5d-demo \ - --tasks $TASK_ID \ - --query tasks[0].containerInstanceArn \ - --output text) -INSTANCE_ID=$( \ - aws ecs describe-container-instances \ - --cluster l5d-demo \ - --container-instances $CONTAINER_INSTANCE \ - --query containerInstances[0].ec2InstanceId \ - --output text) -ECS_NODE=$( \ - aws ec2 describe-instances \ - --instance-ids $INSTANCE_ID \ - --query Reservations[*].Instances[0].PublicDnsName \ - --output text) - -# view linkerd-viz (osx only) -open http://$ECS_NODE:3000 -``` - -If everything worked correctly, we should see a dashboard like this: - -{{< fig - alt="ECS linkerd-viz" - title="ECS linkerd-viz" - src="/uploads/2018/05/ecs-linkerd-viz.png" >}} - -## Conclusion - -In the above post, we’ve show how to deploy Linkerd on ECS to provide a service mesh: a dedicated layer for managing and monitoring all service-to-service communication. This is only the tip of the iceberg: Linkerd can also be used to merge ECS, Kubernetes, DC/OS, and other environments into a single logical service namespace; to implement complex traffic patterns like hybrid cloud and multi-cloud topologies; and much more. - -## Credits - -The examples and configurations in this post drew heavily from some excellent blog posts. Have a look at them for other approaches to running ECS: - -- [Linkerd: A service mesh for AWS ECS](https://medium.com/attest-engineering/linkerd-a-service-mesh-for-aws-ecs-937f201f847a) by Dario Simonetti -- [Running Linkerd in a docker container on AWS ECS](https://kevinholditch.co.uk/2017/06/28/running-linkerd-in-a-docker-container-on-aws-ecs/) by Kevin Holditch -- [Deploying Consul With ECS](https://blog.unif.io/deploying-consul-with-ecs-2c4ca7ab2981) by Wilson Carey - -## Further reading - -There’s a lot more that you can do with Linkerd. For more details about this setup, see [Getting Started: Running in ECS](https://linkerd.io/getting-started/ecs/). For all commands and config files referenced in this post, see the [linkerd-examples repo](https://github.com/linkerd/linkerd-examples/tree/master/ecs). For more information about configuring Linkerd, see the [Linkerd Configuration](https://api.linkerd.io/latest/linkerd/index.html) page. Finally, for more information about linkerd-viz, see the [linkerd-viz Github repo](https://github.com/linkerd/linkerd-viz). - -We hope this post was useful. We’d love to get your thoughts. Please join us in the [Linkerd Support Forum](https://linkerd.buoyant.io/) and the Linkerd [Slack](https://slack.linkerd.io/) channel! diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-i-top-line-service-metrics.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-i-top-line-service-metrics.md deleted file mode 100644 index df2e6ed6aa..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-i-top-line-service-metrics.md +++ /dev/null @@ -1,178 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-i-top-line-service-metrics' -title: 'A Service Mesh for Kubernetes, Part I: Top-line service metrics' -aliases: - - /2016/10/04/a-service-mesh-for-kubernetes-part-i-top-line-service-metrics-2/ -author: 'alex' -date: Tue, 04 Oct 2016 22:42:59 +0000 -draft: false -featured: false -thumbnail: /uploads/kubernetes1_featured_Twitter_ratio.png -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -What is a service mesh, and how is it used by cloud native apps—apps designed for the cloud? In this article, we’ll show you how to use [linkerd](https://linkerd.io/) as a service mesh on Kubernetes, and how it can capture and report top-level service metrics such as success rates, request volumes, and latencies without requiring changes to application code. - -Note: This is one article in a series of articles about [linkerd](https://linkerd.io/), [Kubernetes](http://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) (this article) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -## The services must mesh - -One of the most common questions we see about linkerd is, what exactly is a *service mesh*? And why is a service mesh a critical component of cloud native apps, when environments like Kubernetes provide primitives like service objects and load balancers? - -In short, a service mesh is a layer that manages the communication between apps (or between parts of the same app, e.g. microservices). In traditional apps, this logic is built directly into the application itself: retries and timeouts, monitoring/visibility, tracing, service discovery, etc. are all hard-coded into each application. - -However, as application architectures become increasingly segmented into services, moving communications logic out of the application and into the underlying infrastructure becomes increasingly important. Just as applications shouldn’t be writing their own TCP stack, they also shouldn’t be managing their own load balancing logic, or their own service discovery management, or their own retry and timeout logic. (For example, see [Oliver Gould’s MesosCon talk](https://www.youtube.com/watch?v=VGAFFkn5PiE#t=23m47) for more about the difficulty of coordinating retries and timeouts across multiple services.) - -A service mesh like linkerd provides critical features to multi-service applications running at scale: - -- **Baseline resilience**: retry budgets, deadlines, circuit-breaking. -- **Top-line service metrics**: success rates, request volumes, and latencies. -- **Latency and failure tolerance**: Failure- and latency-aware load balancing that can route around slow or broken service instances. -- **Distributed tracing** a la [Zipkin](https://github.com/openzipkin/zipkin) and [OpenTracing](http://opentracing.io/) -- **Service discovery**: locate destination instances. -- **Protocol upgrades**: wrapping cross-network communication in TLS, or converting HTTP/1.1 to HTTP/2.0. -- **Routing**: route requests between different versions of services, failover between clusters, etc. - -In this article, we’re going to focus just on visibility: how a service mesh can automatically capture and report top-line metrics, such as success rate, for services. We’ll walk you through a quick example in Kubernetes. - -## Using linkerd for service monitoring in Kubernetes - -One of the advantages of operating at the request layer is that the service mesh has access to protocol-level semantics of success and failure. For example, if you’re running an HTTP service, linkerd can understand the semantics of 200 versus 400 versus 500 responses and can calculate metrics like success rate automatically. (Operating at this layer becomes doubly important when we talk about retries—more on that in later articles.) - -Let’s walk through a quick example of how to install linkerd on Kubernetes to automatically capture aggregated, top-line service success rates without requiring application changes. - -### STEP 1: INSTALL LINKERD - -Install linkerd using [this Kubernetes config](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd.yml). This will install linkerd as a DaemonSet (i.e., one instance per host) running in the default Kubernetes namespace: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd.yml -``` - -You can confirm that installation was successful by viewing linkerd’s admin page: - -```bash -INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$INGRESS_LB:9990 # on OS X -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -HOST_IP=$(kubectl get po -l app=l5d -o jsonpath="{.items[0].status.hostIP}") -open http://$HOST_IP:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[2].nodePort}') # on OS X -``` - -{{< fig - alt="request path diagram" - title="Request paths" - src="/uploads/2017/07/buoyant-k8s-linkerd-admin-large-1024x737.png" >}} - -### STEP 2: INSTALL THE SAMPLE APPS - -Install two services, “hello” and “world”, in the default namespace. These apps rely on the nodeName supplied by the [Kubernetes downward API](https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/) to find Linkerd. To check if your cluster supports nodeName, you can run this test job: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/node-name-test.yml -``` - -And then looks at its logs: - -```bash -kubectl logs node-name-test -``` - -If you see an ip, great! Go ahead and deploy the hello world app using: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml -``` - -If instead you see a “server can’t find …” error, deploy the hello-world legacy version that relies on hostIP instead of nodeName: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-legacy.yml -``` - -These two services--"hello" and "world"--function together to make a highly scalable, “hello world” microservice (where the hello service, naturally, calls the world service to complete its request). You can see this in action by sending traffic through linkerd’s external IP: - -```bash -http_proxy=$INGRESS_LB:4140 -curl -s http://hello -``` - -Or to use hostIP directly: - -```bash -http_proxy=$HOST_IP:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[0].nodePort}') -curl -s http://hello -``` - -You should see the string “Hello world”. - -### STEP 3: INSTALL LINKERD-VIZ - -Finally, let’s take a look at what our services are doing by installing [linkerd-viz](https://github.com/linkerd/linkerd-viz). linkerd-viz is a supplemental package that includes a simple Prometheus and Grafana setup and is configured to automatically find linkerd instances. - -Install linkerd-viz using [this linkerd-viz config](https://raw.githubusercontent.com/linkerd/linkerd-viz/main/k8s/linkerd-viz.yml). This will install linkerd-viz into the default namespace: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-viz/main/k8s/linkerd-viz.yml -``` - -Open linkerd-viz’s external IP to view the dashboard: - -```bash -VIZ_INGRESS_LB=$(kubectl get svc linkerd-viz -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$VIZ_INGRESS_LB # on OS X -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -VIZ_HOST_IP=$(kubectl get po -l name=linkerd-viz -o jsonpath="{.items[0].status.hostIP}") -open http://$VIZ_HOST_IP:$(kubectl get svc linkerd-viz -o 'jsonpath={.spec.ports[0].nodePort}') # on OS X -``` - -You should see a dashboard, including selectors by service and instance. All charts respond to these service and instance selectors: - -{{< fig - alt="visualization" - title="Linkerd Visualization" - src="/uploads/2017/07/buoyant-k8s-linkerd-viz-large-1024x739.png" >}} - -The linkerd-viz dashboard includes three sections: - -- **TOP LINE**: Cluster-wide success rate and request volume. -- **SERVICE METRICS**: One section for each application deployed. Includes success rate, request volume, and latency. -- **PER-INSTANCE METRICS**: Success rate, request volume, and latency for each node in your cluster. - -## That’s all - -With just three simple commands we were able to install linkerd on our Kubernetes cluster, install an app, and use linkerd to gain visibility into the health of the app’s services. Of course, linkerd is providing much more than visibility: under the hood, we’ve enabled latency-aware load balancing, automatic retries and circuit breaking, distributed tracing, and more. In upcoming posts in this series, we’ll walk through how to take advantage of all these features. - -In the meantime, for more details about running linkerd in Kubernetes, visit the [Kubernetes Getting Started Guide](https://linkerd.io/getting-started/k8s/) or hop in the [linkerd slack](http://slack.linkerd.io/) and say hi! - -Stay tuned for Part II in this series: [Pods Are Great Until They’re Not][part-ii]. - -[part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not.md deleted file mode 100644 index e9e9f9a490..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not' -title: "A Service Mesh for Kubernetes, Part II: Pods are great until they're not" -aliases: - - /2016/10/14/a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not-2/ - - /2016/10/14/a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not-3/ - - /2016/10/14/a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not-4/ - - /2016/06/17/squeezing-blood-from-a-stone-small-memory-jvm-techniques-for-microservice-sidecars/ -author: 'alex' -thumbnail: /uploads/kubernetes2_featured_Twitter_ratio.png -date: Fri, 14 Oct 2016 22:55:04 +0000 -draft: false -featured: false -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -In our recent post about linkerd on Kubernetes, [A Service Mesh for Kubernetes, Part I: Top-line Service Metrics][part-i], observant readers noticed that linkerd was installed using DaemonSets rather than as a sidecar process. In this post, we’ll explain why (and how!) we do this. - -Note: This is one article in a series of articles about [linkerd](https://linkerd.io/), [Kubernetes](http://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) (this article) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -As a service mesh, linkerd is designed to be run alongside application code, managing and monitoring inter-service communication, including performing service discovery, retries, load-balancing, and protocol upgrades. - -At a first glance, this sounds like a perfect fit for a sidecar deployment in Kubernetes. After all, one of Kubernetes’s defining characteristics is its pod model. Deploying as a sidecar is conceptually simple, has clear failure semantics, and we’ve spent a lot of time [optimizing linkerd for this use case][small-memory]. - -However, the sidecar model also has a downside: deploying per pod means that resource costs scale per pod. If your services are lightweight and you run many instances, like [Monzo](https://monzo.com/) (who [built an entire bank on top of linkerd and Kubernetes](https://monzo.com/blog/2016/09/19/building-a-modern-bank-backend/)), then the cost of using sidecars can be quite high. - -We can reduce this resource cost by deploying linkerd per host rather than per pod. This allows resource consumption to scale per host, which is typically a significantly slower-growing metric than pod count. And, happily, Kubernetes provides [DaemonSets](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) for this very purpose. - -Unfortunately, for linkerd, deploying per host is a bit more complicated than just using DaemonSets. Read on for how we solve the service mesh problem with per-host deployments in Kubernetes. - -## A service mesh for Kubernetes - -One of the defining characteristics of a service mesh is its ability to decouple application communication from transport communication. For example, if services A and B speak HTTP, the service mesh may convert that to HTTPS across the wire, without the application being aware. The service mesh may also be doing connection pooling, admission control, or other transport-layer features, also in a way that’s transparent to the application. - -In order to fully accomplish this, linkerd must be on the sending side and the receiving side of each request, proxying to and from local instances. E.g. for HTTP to HTTPS upgrades, linkerd must be able to both initiate and terminate TLS. In a DaemonSet world, a request path through linkerd looks like the diagram below: - -{{< fig - alt="request path diagram" - title="request path diagram" - src="/uploads/2017/07/buoyant-k8s-daemonset-mesh.png" >}} - -As you can see, a request that starts in Pod A on Host 1 and is destined for Pod J on Host 2 must go through Pod A’s *host-local* linkerd instance, then to Host 2’s linkerd instance, and finally to Pod J. This path introduces three problems that linkerd must address: - -- How does an application identify its *host-local* linkerd? -- How does linkerd route an outgoing request to the destination’s linkerd? -- How does linkerd route an incoming request to the destination application? - -What follows are the technical details on how we solve these three problems. If you just want to get linkerd working with Kubernetes DaemonSets, see the [previous blog post][part-i]! - -## HOW DOES AN APPLICATION IDENTIFY ITS HOST-LOCAL LINKERD? - -Since DaemonSets use a Kubernetes `hostPort`, we know that linkerd is running on a fixed port on the host’s IP. Thus, in order to send a request to the linkerd process on the same machine that it’s running on, we need to determine the IP address of its host. - -In Kubernetes 1.4 and later, this information is directly available through the Downward API. Here is an except from [hello-world.yml](https://github.com/linkerd/linkerd-examples/blob/master/k8s-daemonset/k8s/hello-world.yml) that shows how the node name can be passed into the application: - -```yml -env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: http_proxy - value: $(NODE_NAME):4140 -args: - - '-addr=:7777' - - '-text=Hello' - - '-target=world' -``` - -(Note that this example sets the `http_proxy` environment variable to direct all HTTP calls through the *host-local* linkerd instance. While this approach works with most HTTP applications, non-HTTP applications will need to do something different.) - -In Kubernetes releases prior to 1.4, this information is still available, but in a less direct way. We provide a [simple script](https://github.com/linkerd/linkerd-examples/blob/master/docker/helloworld/hostIP.sh) that queries the Kubernetes API to get the host IP; the output of this script can be consumed by the application, or used to build an `http_proxy` environment variable as in the example above. - -Here is an excerpt from [hello-world-legacy.yml](https://github.com/linkerd/linkerd-examples/blob/master/k8s-daemonset/k8s/hello-world-legacy.yml) that shows how the host IP can be passed into the application: - -```yml -env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: NS - valueFrom: - fieldRef: - fieldPath: metadata.namespace -command: - - '/bin/sh' - - '-c' - - 'http_proxy=`hostIP.sh`:4140 helloworld -addr=:7777 -text=Hello -target=world' -``` - -Note that the `hostIP.sh` script requires that the pod’s name and namespace be set as environment variables in the pod. - -## HOW DOES LINKERD ROUTE AN OUTGOING REQUEST TO THE DESTINATION’S LINKERD? - -In our service mesh deployment, outgoing requests should not be sent directly to the destination application, but instead should be sent to the linkerd running on that application’s host. To do this, we can take advantage of powerful new feature introduced in [linkerd 0.8.0](https://github.com/linkerd/linkerd/releases/tag/0.8.0) called transformers, which can do arbitrary post-processing on the destination addresses that linkerd routes to. In this case, we can use the DaemonSet transformer to automatically replace destination addresses with the address of a DaemonSet pod running on the destination’s host. For example, this outgoing router linkerd config sends all requests to the incoming port of the linkerd running on the same host as the destination app: - -```yml -routers: -- protocol: http - label: outgoing - interpreter: - kind: default - transformers: - - kind: io.l5d.k8s.daemonset - namespace: default - port: incoming - service: l5d - ... -``` - -## HOW DOES LINKERD ROUTE AN INCOMING REQUEST TO THE DESTINATION APPLICATION? - -When a request finally arrives at the destination pod’s linkerd instance, it must be correctly routed to the pod itself. To do this we use the `localnode` transformer to limit routing to only pods running on the current host. Example linkerd config: - -```yml -routers: -- protocol: http - label: incoming - interpreter: - kind: default - transformers: - - kind: io.l5d.k8s.localnode - ... -``` - -## Conclusion - -Deploying linkerd as a Kubernetes DaemonSet gives us the best of both worlds—it allows us to accomplish the full set of goals of a service mesh (such as transparent TLS, protocol upgrades, latency-aware load balancing, etc), while scaling linkerd instances per host rather than per pod. - -For a full, working example, see the [previous blog post][part-i], or download our [example app](https://github.com/linkerd/linkerd-examples/tree/master/k8s-daemonset). And for help with this configuration or anything else about linkerd, feel free to drop into our very active [Slack](http://slack.linkerd.io/?__hstc=9342122.76ce13dbfb256ee6981b45631b434a7a.1497486135169.1498849007669.1499118552444.5&__hssc=9342122.14.1499118552444&__hsfp=188505984) or post a topic on the [Linkerd Support Forum](https://linkerd.buoyant.io/). - -## Acknowledgments - -Special thanks to [Oliver Beattie](https://twitter.com/obeattie) and [Oleksandr Berezianskyi](https://github.com/OleksandrBerezianskyi) for their pioneering work on running linkerd as a DaemonSet, and to [Joonas Bergius](https://twitter.com/joonas) for contributing the Kubernetes 1.4 configuration. - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [small-memory]: {{< ref "small-memory-jvm-techniques-for-microservice-sidecars" >}} diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things.md deleted file mode 100644 index 5329416548..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things.md +++ /dev/null @@ -1,179 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things' -title: 'A Service Mesh for Kubernetes, Part III: Encrypting all the things' -author: 'alex' -date: Mon, 24 Oct 2016 23:00:15 +0000 -draft: false -featured: false -thumbnail: /uploads/kubernetes3_featured_Twitter_ratio.png -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -In this article, we’ll show you how to use linkerd as a service mesh to add TLS to all service-to-service HTTP calls, without modifying any application code. - -Note: This is one article in a series of articles about [linkerd](https://linkerd.io/), [Kubernetes](http://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) (this article) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -In the first installment in this series, we showed you how you can [easily monitor top-line service metrics][part-i] \(success rates, latencies, and request rates\) when linkerd is installed as a service mesh. In this article, we’ll show you another benefit of the service mesh approach: it allows you to decouple the application’s protocol from the protocol used on the wire. In other words, the application can speak one protocol, but the bytes that actually go out on the wire are in another. - -In the case where no data transformation is required, linkerd can use this decoupling to automatically do protocol upgrades. Examples of the sorts of protocol upgrades that linkerd can do include HTTP/1.x to HTTP/2, thrift to [thrift-mux](http://twitter.github.io/finagle/guide/Protocols.html#mux), and, the topic of this article, HTTP to HTTPS. - -## A Service Mesh for Kubernetes - -When linkerd is deployed as a service mesh on Kubernetes, we [place a linkerd instance on every host using DaemonSets][part-ii]. For HTTP services, pods can send HTTP traffic to their host-local linkerd by using the `http_proxy` environment variable. (For non-HTTP traffic the integration is slightly more complex.) - -In our blog post from a few months ago, we showed you the basic pattern of [using linkerd to “wrap” HTTP calls in TLS]({{< relref "transparent-tls-with-linkerd" >}}) by proxying at both ends of the connection, both originating and terminating TLS. However, now that we have the service mesh deployment in place, things are significantly simpler. Encrypting all cross-host communication is largely a matter of providing a TLS certificate to the service mesh. - -Let’s walk through an example. The first two steps will be identical to what we did in [Part I of this series][part-i]—we’ll install linkerd as a service mesh and install a simple microservice “hello world” application. If you have already done this, you can skip straight to [step 3][part-iii]. - -## STEP 1: INSTALL LINKERD - -We can install linkerd as a service mesh on our Kubernetes cluster by using [this Kubernetes config](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd.yml). This will install linkerd as a DaemonSet (i.e., one instance per host) in the default Kubernetes namespace: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd.yml -``` - -You can confirm that installation was successful by viewing linkerd’s admin page (note that it may take a few minutes for the ingress IP to become available): - -```bash -INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$INGRESS_LB:9990 # on OS X -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -HOST_IP=$(kubectl get po -l app=l5d -o jsonpath="{.items[0].status.hostIP}") -open http://$HOST_IP:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[2].nodePort}') # on OS X -``` - -## STEP 2: INSTALL THE SAMPLE APPS - -Install two services, “hello” and “world”, in the default namespace. These apps rely on the nodeName supplied by the [Kubernetes downward API](https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/) to find Linkerd. To check if your cluster supports nodeName, you can run this test job: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/node-name-test.yml -``` - -And then looks at its logs: - -```bash -kubectl logs node-name-test -``` - -If you see an ip, great! Go ahead and deploy the hello world app using: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml -``` - -If instead you see a “server can’t find …” error, deploy the hello-world legacy version that relies on hostIP instead of nodeName: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-legacy.yml -``` - -These two services function together to make a highly scalable, “hello world” microservice (where the hello service, naturally, must call the world service to complete its request). - -At this point, we actually have a functioning service mesh and an application that makes use of it. You can see the entire setup in action by sending traffic through linkerd’s external IP: - -```bash -http_proxy=$INGRESS_LB:4140 -curl -s http://hello -``` - -Or to use hostIP directly: - -```bash -http_proxy=$HOST_IP:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[0].nodePort}') -curl -s http://hello -``` - -If everything’s working, you should see the string “Hello world”. - -## STEP 3: CONFIGURE LINKERD TO USE TLS - -Now that linkerd is installed, let’s use it to encrypt traffic. We’ll place TLS certificates on each of the hosts, and configure linkerd to use those certificates for TLS. - -We’ll use a global certificate (the mesh certificate) that we generate ourselves. Since this certificate is not tied to a public DNS name, we don’t need to use a service like [Let’s Encrypt](https://letsencrypt.org/). We can instead generate our own CA certificate and use that to sign our mesh certificate (“self-signing”). We’ll distribute three things to each Kubernetes host: the CA certificate, the mesh key, and the mesh certificate. - -The following scripts use sample certificates that we’ve generated. *Please don’t use these certificates in production*. For instructions on how to generate your own self-signed certificates, see our previous post, where we have [instructions on how to generate your own certificates]({{< -relref "transparent-tls-with-linkerd" >}}#generating-certificates)). - -## STEP 4: DEPLOY CERTIFICATES AND CONFIG CHANGES TO KUBERNETES - -We’re ready to update linkerd to encrypt traffic. We will distribute the [sample certificates](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/certificates.yml) as Kubernetes [secrets](https://kubernetes.io/docs/concepts/configuration/secret/). - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/certificates.yml -``` - -Now we will configure linkerd to use these certificates by giving it [this configuration](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-tls.yml) and restarting it: - -```bash -kubectl delete ds/l5d configmap/l5d-config -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-tls.yml -``` - -## STEP 5: SUCCESS! - -At this point, linkerd should be transparently wrapping all communication between these services in TLS. Let’s verify this by running the same command as before: - -```bash -http_proxy=$INGRESS_LB:4140 -curl -s http://hello -``` - -Or using hostIP: - -```bash -http_proxy=$HOST_IP:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[0].nodePort}') -curl -s http://hello -``` - -If all is well, you should still see the string “Hello world”—but under the hood, communication between the hello and world services is being encrypted. We can verify this by making an HTTPS request directly to port 4141, where linkerd is listening for requests from other linkerd instances: - -```bash -curl -skH 'l5d-dtab: /svc=>/#/io.l5d.k8s/default/admin/l5d;' https://$INGRESS_LB:4141/admin/ping -``` - -Or using hostIP: - -```bash -curl -skH 'l5d-dtab: /svc=>/#/io.l5d.k8s/default/admin/l5d;' -https://$HOST_IP:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[1].nodePort}')/admin/ping -``` - -Here we’re asking curl to make an HTTPS call, and telling it to skip TLS validation (since curl is expecting a website, not linkerd). We’re also adding a [dtab override](https://linkerd.io/features/routing/#per-request-routing) to route the request to the linkerd instance’s own admin interface. If all is well, you should again see a successful “pong” response. Congratulations! You’ve encrypted your cross-service traffic. - -## Conclusion - -In this post, we’ve shown how a service mesh like linkerd can be used to to transparently encrypt all cross-node communication in a Kubernetes cluster. We’re also using TLS to ensure that linkerd instances can verify that they’re talking to other linkerd instances, preventing man-in-the-middle attacks (and misconfiguration!). Of course, the application remains blissfully unaware of any of these changes. - -TLS is a complex topic and we’ve glossed over some important security considerations for the purposes of making the demo easy and quick. Please make sure you spend time to fully understand the steps involved before you try this on your production cluster. - -Finally, adding TLS to the communications substrate is just one of many things that can be accomplished with a service mesh. Be sure to check out the rest of the articles in this series for more! - -For help with this or anything else about linkerd, feel free to stop by our [linkerd community Slack](http://slack.linkerd.io/), post a topic on the [Linkerd Support Forum](https://linkerd.buoyant.io/), or [contact us directly](https://linkerd.io/overview/help/)! - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting.md deleted file mode 100644 index 59f2fdac15..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting.md +++ /dev/null @@ -1,404 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting' -title: 'A Service Mesh for Kubernetes, Part IV: Continuous deployment via traffic shifting' -aliases: - - /2016/11/04/a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting/ -author: 'sarah' -date: Fri, 04 Nov 2016 23:02:03 +0000 -thumbnail: /uploads/kubernetes4_featured_Twitter_ratio.png -draft: false -featured: false -tags: [Linkerd, linkerd, News, tutorials] ---- - -**Updated (01-05-2018):** There is a new namerd.yml that now uses the CustomResourceDefinition API that was recently added in Kubernetes 1.8. - -Beyond service discovery, top-line metrics, and TLS, Linkerd also has a powerful routing language, called *dtabs*, that can be used to alter the ways that requests—even individual requests—flow through the application topology. In this article, we’ll show you how to use Linkerd as a service mesh to do blue-green deployments of new code as the final step of a CI/CD pipeline. - -Note: this post was co-written with [Kevin Lingerfelt](https://twitter.com/klingerf). This is one article in a series of articles about [Linkerd](https://linkerd.io/), [Kubernetes](http://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) (this article) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -In previous installments of this series, we’ve shown you how you can use a service mesh like Linkerd to [capture top-line service metrics][part-i] and [transparently add TLS][part-iii] to your application, without changing application code. - -In this article, we’ll show you an example of how to use Linkerd’s routing rules, called [dtabs](https://linkerd.io/in-depth/dtabs/), to automatically alter traffic flow through your application at the end of a CI/CD pipeline to perform a [blue-green deployment](http://martinfowler.com/bliki/BlueGreenDeployment.html) between old and new versions of a service. - -Continuous deployment (CD) is an extension of continuous integration (CI), in which code is pushed to production on a continuous basis, tightly coupled to the development process. While it requires powerful automation, minimizing the time between development and deployment allows companies to iterate very rapidly on their product. - -For multi-service or microservice architectures, the final step of the CD process, the deployment itself, can be risky because so much runtime behavior is determined by the runtime environment, including the other services that are handling production traffic. In these situations, gradual rollouts such as blue-green deployments become increasingly important. - -Coordinating traffic shifting across multiple Linkerds requires a centralized traffic control tool. For this we recommend [namerd](https://linkerd.io/in-depth/namerd), a service with an API that serves routing rules backed by a consistent store. You can read more about how namerd integrates with production systems in our previous blog post covering [routing in linkerd](/2016/05/04/real-world-microservices-when-services-stop-playing-well-and-start-getting-real/#routing-in-linkerd). - -We’ll demonstrate a blue-green deployment using an example app from the [linkerd-examples](https://github.com/linkerd/linkerd-examples/tree/master/docker/helloworld) Github repo. The example app is a contrived “hello world” microservice application, consisting a of “hello” service that handles incoming requests and calls a “world” service before returning a response. With Jenkins as our automation server, we’ll deploy a new version of the world service using the [Jenkins Pipeline Plugin](https://github.com/jenkinsci/pipeline-plugin). - -## A Service Mesh for Kubernetes - -Before we start continuously deploying, we’ll need to initially deploy the hello world app to Kubernetes, routing requests using Linkerd and namerd. We can do this easily by using the [Kubernetes configs](https://github.com/linkerd/linkerd-examples/tree/master/k8s-daemonset/k8s) in the linkerd-examples repo. - -## STEP 1: INSTALL NAMERD - -We’ll start by installing namerd, which will manage the dtabs that we use to orchestrate our blue-green deployments. Please note that our namerd configuration uses the [CustomResourceDefinition APIs](https://kubernetes.io/docs/tasks/access-kubernetes-api/extend-api-custom-resource-definitions/), which requires a cluster running Kubernetes 1.8+ - -Note: Prior to Kubernetes 1.8, Linkerd used the ThirdPartyResources API to store dtabs. The API has been deprecated in favor of the new CustomResourceDefinitions API. - -To install namerd in the default Kubernetes namespace, run (Kubernetes 1.8+): - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/namerd.yml -``` - -If you are using Kubernetes  < 1.7, run - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/namerd-legacy.yml -``` - -You can confirm that installation was successful by viewing namerd’s admin page (note that it may take a few minutes for the ingress IP to become available): - -```bash -NAMERD_INGRESS_LB=$(kubectl get svc namerd -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$NAMERD_INGRESS_LB:9991 # on OS X -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -NAMERD_HOST_IP=$(kubectl get po -l app=namerd -o 'jsonpath={.items[0].status.hostIP}') -open http://$NAMERD_HOST_IP:$(kubectl get svc namerd -o 'jsonpath={.spec.ports[2].nodePort}') # on OS X -``` - -The admin page displays all configured namerd namespaces, and we’ve configured two namespaces—“external” and “internal”. For the sake of continuous deployment, we’re mostly concerned with the “internal” namespace. - -In addition to the admin UI, we can also use the [namerctl](https://github.com/linkerd/namerctl) utility to talk directly to namerd. This utility will be used by the deploy script to start sending traffic to newly deployed services. To install it locally, run: - -```bash -go get -u github.com/linkerd/namerctl go install github.com/linkerd/namerctl -``` - -The utility uses the `NAMERCTL_BASE_URL` environment variable to connect to namerd. In order to connect to the version of namerd that we just deployed to Kubernetes, set the variable as follows: - -```bash -export NAMERCTL_BASE_URL=http://$NAMERD_INGRESS_LB:4180 -``` - -Or to use hostIP directly: - -```bash -export NAMERCTL_BASE_URL=http://$NAMERD_HOST_IP:$(kubectl get svc namerd -o 'jsonpath={.spec.ports[1].nodePort}') -``` - -And now try using `namerctl` to display the internal dtab: - -```bash -$ namerctl dtab get internal -# version MjgzNjk5NzI= -/srv => /#/io.l5d.k8s/default/http ; -/host => /srv ; -/tmp => /srv ; -/svc => /host ; -/host/world => /srv/world-v1 ; -``` - -The last line of the dtab maps the logical name of the `world` service to the currently deployed version of the world service, `world-v1`. In a production system, versions could be shas, dates, or anything else that guarantees name uniqueness. We’ll use this dtab entry to safely introduce new versions of the world service into production. - -## STEP 2: INSTALL LINKERD - -Next we’ll install Linkerd and configure it to resolve routes using namerd. To install Linkerd as a DaemonSet (i.e., one instance per host) in the default Kubernetes namespace, run: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-namerd.yml -``` - -You can confirm that installation was successful by viewing Linkerd’s admin UI (note that it may take a few minutes for the ingress IP to become available): - -```bash -L5D_INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$L5D_INGRESS_LB:9990 # on OS X -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -L5D_HOST_IP=$(kubectl get po -l app=l5d -o 'jsonpath={.items[0].status.hostIP}') -open http://$L5D_HOST_IP:\$(kubectl get svc l5d -o 'jsonpath={.spec.ports[3].nodePort}') # on OS X -``` - -We’ll use the admin UI to verify steps of the blue-green deploy. - -## STEP 3: INSTALL THE SAMPLE APPS - -Now we’ll install the hello and world apps in the default namespace. These apps rely on the nodeName supplied by the [Kubernetes downward API](https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/) to find Linkerd. To check if your cluster supports nodeName, you can run this test job: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/node-name-test.yml -``` - -And then looks at its logs: - -```bash -kubectl logs node-name-test -``` - -If you see an ip, great! Go ahead and deploy the hello world app using: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml -``` - -If instead you see a "server can't find ..." error, deploy the hello-world legacy version that relies on hostIP instead of nodeName: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-legacy.yml -``` - -At this point, we actually have a functioning service mesh and an application that makes use of it. You can see the entire setup in action by sending traffic through linkerd’s external IP: - -```bash -curl $L5D_INGRESS_LB Hello (10.196.2.5) world (10.196.2.6)!! -``` - -Or to use hostIP directly: - - -```bash -$ L5D_INGRESS_LB=$L5D_HOST_IP:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[0].nodePort}') -$ curl $L5D_INGRESS_LB -Hello (10.196.2.5) world (10.196.2.6)!! -``` - - -If everything is working, you’ll see a “Hello world” message similar to that above, with the IPs of the pods that served the request. - -## Continuous deployment - -We’ll now use Jenkins to perform blue-green deploys of the “world” service that we deployed in the previous step. - -### SETUP JENKINS - -Let’s start by deploying the [buoyantio/jenkins-plus](https://hub.docker.com/r/buoyantio/jenkins-plus/) Docker image to our Kubernetes cluster. This image provides the base `jenkins` image, along with the `kubectl` and `namerctl` binaries that we need, as well as additional plugins and a pre-configured pipeline job that we can use to run deployments. The pipeline job makes use of the [Jenkins Pipeline Plugin](https://github.com/jenkinsci/pipeline-plugin) and a [custom Groovy script](https://gist.github.com/klingerf/14a78b3408eab0327b0de483dc174fbb) that handles each of the steps in the blue-green deploy for us. - -To deploy the Jenkins image to the default Kubernetes namespace, run: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/jenkins.yml -``` - -You can confirm that installation was successful by opening up the Jenkins web UI (note that it may take a few minutes for the ingress IP to become available): - -```bash -JENKINS_LB=$(kubectl get svc jenkins -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$JENKINS_LB # on OS X -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -JENKINS_HOST_IP=$(kubectl get po -l app=jenkins -o 'jsonpath={.items[0].status.hostIP}') -open http://$JENKINS_HOST_IP:$(kubectl get svc jenkins -o 'jsonpath={.spec.ports[0].nodePort}') # on OS X -``` - -You should see a “hello_world” job in the UI. - -### COMMITTING CODE - -Now it’s time to make some code changes to the world service, and have the Jenkins job deploy them to production for us. To do this, start by forking the [linkerd-examples](https://github.com/linkerd/linkerd-examples) repo in the Github UI. Once you’ve created a fork, clone your fork locally: - -```bash -git clone https://github.com/esbie/linkerd-examples.git cd linkerd-examples -``` - -For the sake of this example, we’re going to change a text file that controls the output of the world service. By default, the world service outputs the string “world”: - -```bash -cat k8s-daemonset/helloworld/world.txt world -``` - -Let’s spice that up a bit: - -```bash -echo "hal, open the pod bay doors" > k8s-daemonset/helloworld/world.txt -``` - -And commit it: - -```bash -git commit -am "Improve the output of the world service" git push origin master -``` - -Now it’s time to get this critical change into production. - -### RUNNING THE JOB - -With our change committed and pushed to our fork of the `linkerd-examples` repo, we can kick off the Jenkins “hello_world” pipeline job to safely deploy the change into production. Each of the 6 steps in the pipeline job is controlled by a [custom Groovy script](https://gist.github.com/klingerf/14a78b3408eab0327b0de483dc174fbb) and described below in more detail. The deploy is fully automated, with the exception of three places in the pipeline where it pauses for human-in-the-loop verification of critical metrics before proceeding. - -#### BUILD WITH PARAMETERS - -To start the deploy, click into the “hello_world” job in the Jenkins UI, and then click “Build with the parameters” in the sidebar. You’ll be taken to a page that lets you customize the deploy, and it will look something like this: - -{{< fig - alt="pipeline hello world" - title="pipeline hello world" - src="/uploads/2017/07/buoyant-pipeline-build-parameters.png" >}} - -Change the value of the `gitRepo` form field to point to your fork of the `linkerd-examples` repo, and then click the “Build” button. Note that if you pushed your changes to a separate branch in your fork, you should also change the value of the `gitBranch`form field to match your branch name. - -#### CLONE - -The first step in the pipeline is to clone the git repo using the build parameters specified above. Pretty straightforward. - -#### DEPLOY - -The second step in the deploy pipeline is to actually deploy the new version of the world service to our cluster, without sending it any traffic. The script determines that the currently deployed version of the world service is `world-v1`, so it creates a new service called `world-v2` and deploys that to our Kubernetes cluster. At this point you will see two different versions of the world service running simultaneously: - -```bash -$ kubectl get po | grep world -world-v1-9eaxk 1/1 Running 0 3h -world-v1-kj6gi 1/1 Running 0 3h -world-v1-vchal 1/1 Running 0 3h -world-v2-65y9g 1/1 Running 0 30m -world-v2-d260q 1/1 Running 0 30m -world-v2-z7ngo 1/1 Running 0 30m -``` - -Even with the `world-v2` version fully deployed, we still have not made any changes to production traffic! Linkerd and namerd are still configured to route all world service traffic to the existing `world-v1` version. Fully deploying a new version of the service before sending it any traffic is key to performing a blue-green deploy. - -#### INTEGRATION TESTING - -Once the new version of our service is deployed, the script performs a test request to make sure the new version can be reached. If the test request succeeds, it pauses the deploy and waits for us to acknowledge that the newly deployed version looks correct before proceeding. - -{{< fig - alt="Stage View" - title="Stage View" - src="/uploads/2017/07/buoyant-pipeline-integration-testing.png" >}} - -At this point, we want to make sure that the new pods are running as expected—not just by themselves, but in conjunction with the rest of the production environment. Normally this would involve a deployment to a separate staging cluster, combined with some mechanism for sending or replaying production traffic to that cluster. - -Since we’re using Linkerd, we can significantly simplify this operation by taking advantage of Linkerd’s [per-request routing](https://linkerd.io/features/routing/#per-request-routing) to accomplish the same thing without a dedicated staging environment. At ingress, we can tag our request with a special header, `l5d-dtab`, that will instruct Linkerd to route this request through the production cluster, but replace all service calls to `world-v1` with calls to `world-v2` instead *for this request only*. - -The Jenkins UI provides the dtab override that we need to route requests to the new version of our service, and using that information we can make our own test request: - -```bash -$ curl -H 'l5d-dtab: /host/world => /tmp/world-v2' $L5D_INGRESS_LB -Hello (10.196.2.5) hal, open the pod bay doors (10.196.1.17)!! -``` - -Success! Our request is being routed to the `world-v2` service, which is returning the new world text that we added on our branch. Even though we can reach the new service, it’s worth noting that we *still* have not changed the behavior of any production traffic, aside from the request that we just made. We can verify that by omitting the `l5d-dtab` header and ensuring that we still get the `world-v1` response: - -```bash -$ curl $L5D_INGRESS_LB -Hello (10.196.2.5) world (10.196.2.6)!! -``` - -If everything looks good, we can proceed to the next step in the pipeline by clicking the “Ok, I’m done with manual testing” button in the Jenkins UI. - -#### SHIFT TRAFFIC (10%) - -After some manual testing, we’re ready to start the blue-green deployment by sending 10% of production traffic to the newly deployed version of the service. The script makes the change in routing policy and again pauses, asking us to confirm that everything looks OK with 10% traffic before proceeding. - -{{< fig - alt="Stage View" - title="Stage View" - src="/uploads/2017/07/buoyant-pipeline-shift-traffic-10.png" >}} - -Note that if the user aborts on any pipeline step, the script assumes there was something wrong with the new service, and automatically reverts the routing change, sending all traffic back to the original service. Since we’re not tearing down instances of the old version of the service while shifting traffic, reverting traffic back can happen quickly, minimizing the impact of a bad deploy. - -We can verify that our service is taking 10% of requests by sending it 10 requests and hoping that the odds are in our favor: - -```bash -$ for i in {1..10}; do curl $L5D_INGRESS_LB; echo ""; done -Hello (10.196.2.5) world (10.196.1.16)!! -Hello (10.196.2.5) world (10.196.1.16)!! -Hello (10.196.2.5) hal, open the pod bay doors (10.196.2.13)!! -Hello (10.196.2.5) world (10.196.2.6)!! -Hello (10.196.1.13) world (10.196.2.6)!! -Hello (10.196.1.13) world (10.196.2.6)!! -Hello (10.196.2.5) world (10.196.1.16)!! -Hello (10.196.2.5) world (10.196.2.6)!! -Hello (10.196.1.14) world (10.196.2.6)!! -Hello (10.196.1.14) world (10.196.1.16)!! -``` - -Looking good! Now is also a good time to check Linkerd’s admin dashboard, to verify that the new service is healthy. If your application were receiving a small amount of steady traffic, then the dashboard would look like this: - -{{< fig - alt="Dashboard" - title="Dashboard" - src="/uploads/2017/07/buoyant-pipeline-admin-large-1024x737.png" >}} - -We can see right away that the `world-v2` service is taking roughly 10% of traffic, with 100% success rate. If everything looks good, we can proceed to the next step by clicking the “Ok, success rates look stable” button in the Jenkins UI. - -#### SHIFT TRAFFIC (100%) - -In this step the script shifts additional traffic to the new version of our service. For a concise example, we’re moving immediately to 100% of traffic, but in a typical deployment you could include additional intermediary percentages as separate steps in the pipeline. - -{{< fig - alt="Stage View" - title="Stage View" - src="/uploads/2017/07/buoyant-pipeline-shift-traffic-100.png" >}} - -We can verify that the new service is serving traffic by sending it a request without a dtab override header: - -```bash -$ curl $L5D_INGRESS_LB -Hello (10.196.2.5) hal, open the pod bay doors (10.196.2.13)!! -``` - -Once we’re confidant that `world-v2` is successfully handling 100% of production traffic, we can proceed to the final step by clicking the “Ok, everything looks good” button in the Jenkins UI. - -#### CLEANUP - -In the final step, the script finalizes the deploy by making the routing rules to route traffic to the new version of the service permanent. It also tears down the previous version of the service that was still running in our cluster but not receiving any traffic. - -{{< fig - alt="cleanup" - title="cleanup" - src="/uploads/2017/07/buoyant-pipeline-cleanup.png" >}} - -The final version of namerd’s dtab is now: - -```bash -$ namerctl dtab get internal -# version MTIzMzU0OTE= -/srv => /#/io.l5d.k8s/default/http ; -/host => /srv ; -/tmp => /srv ; -/http/*/* => /host ; -/host/world => /srv/world-v2 ; -``` - -We can verify that the old service has been torn down by looking at the world service pods that are currently deployed to our cluster. - -```bash -$ kubectl get po | grep world -world-v2-65y9g 1/1 Running 0 1h -world-v2-d260q 1/1 Running 0 1h -world-v2-z7ngo 1/1 Running 0 1h -``` - -Everything looks good. Kicking off a subsequent pipeline job will deploy a `world-v3`version of the service, gradually shift traffic over, and then promote it to the current version when the deploy successfully completes. - -## Conclusion - -In this post, we’ve shown a basic workflow incorporating Linkerd, namerd, and Jenkins to progressively shift traffic from an old version to a new version of a service as the final step of a continuous deployment pipeline. We’ve shown how Linkerd’s ability to do per-request routing actually lets us stage the new version of the service without needing a separate staging cluster, by using the `l5d-dtab` header to stitch the new service into the production topology *just for that request*. Finally, we’ve shown how percentage-based traffic shifting can be combined with a Jenkins `input` step to allow for human-in-the-loop verification of metrics as traffic moves from 0% to 100%. - -This was a fairly simple example, but we hope it demonstrates the basic pattern of using service mesh routing for continuous deployment and provides a template for customizing this workflow for your own organization. For help with dtabs or anything else about Linkerd, feel free to stop by the [Linkerd Support Forum](https://linkerd.buoyant.io/), [Linkerd community Slack](http://slack.linkerd.io/), or [contact us directly](https://linkerd.io/overview/help/)! - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit.md deleted file mode 100644 index 14bc0c1d3d..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit' -title: 'A Service Mesh For Kubernetes Part IX: gRPC for fun and profit' -aliases: - - /2017/04/19/a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit/ -author: 'risha' -date: Wed, 19 Apr 2017 23:34:57 +0000 -thumbnail: /uploads/kubernetes9_featured_Twitter_ratio.png -draft: false -featured: false -tags: [Linkerd, linkerd, News, tutorials] ---- - -As of Linkerd 0.8.5, released earlier this year, [Linkerd supports gRPC and HTTP/2]({{< ref "http2-grpc-and-linkerd" >}})! These powerful protocols can provide significant benefits to applications that make use of them. In this post, we’ll demonstrate how to use Linkerd with gRPC, allowing applications that speak gRPC to take full advantage of Linkerd’s load balancing, service discovery, circuit breaking, and distributed tracing logic. - -This article is one of a series of articles about [Linkerd](https://linkerd.io/), [Kubernetes](https://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) (this article) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -For this post we’ll use our familiar `hello world` microservice app and configs, which can be found in the `linkerd-examples` repo ([k8s configs here](https://github.com/BuoyantIO/linkerd-examples/tree/master/k8s-daemonset) and [`hello world` code here](https://github.com/BuoyantIO/linkerd-examples/tree/master/docker/helloworld)). - -The `hello world` application consists of two components—a `hello` service which calls a `world` service to complete a request. `hello` and `world` use gRPC to talk to each other. We’ll deploy Linkerd as a DaemonSet (so one Linkerd instance per host), and a request from `hello` to `world` will look like this: - -{{> fig - alt="DaemonSet deployment model: one Linkerd per host" - title="DaemonSet deployment model: one Linkerd per host" - src="/uploads/2017/07/buoyant-grpc-daemonset-1024x617.png" >}} - -As shown above, when the `hello` service wants to call `world`, the request goes through the *outgoing* router of its host-local Linkerd, which does not send the request directly to the destination `world` service, but to a Linkerd instance running on the same host as `world` (on its *incoming* router). That Linkerd instance then sends the request to the `world` service on its host. This three-hop model allows Linkerd to decouple the application’s protocol from the transport protocol—for example, [by wrapping cross-node connections in TLS][part-iii]. (For more on this deployment topology, see Part II of this series, [Pods are great until they’re not][part-ii].) - -## TRYING THIS AT HOME - -Let’s see this setup in action! Deploy the `hello` and `world` to the default k8s namespace. These apps rely on the nodeName supplied by the [Kubernetes downward API](https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/) to find Linkerd. To check if your cluster supports nodeName, you can run this test job: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/node-name-test.yml -``` - -And then looks at its logs: - -```bash -kubectl logs node-name-test -``` - -If you see an ip, great! Go ahead and deploy the hello world app using: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-grpc.yml -``` - -If instead you see a “server can’t find …” error, deploy the hello-world legacy version that relies on hostIP instead of nodeName: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-grpc-legacy.yml -``` - -Also deploy Linkerd: - -```bash -kubectl apply -f https://raw.githubusercontent.com/BuoyantIO/linkerd-examples/master/k8s-daemonset/k8s/linkerd-grpc.yml -``` - -Once Kubernetes provisions an external LoadBalancer IP for Linkerd, we can do some test requests! Note that the examples in these blog posts assume k8s is running on GKE (e.g. external loadbalancer IPs are available, no CNI plugins are being used). Slight modifications may be needed for other environments, for example Minikube or CNI configurations with Calico/Weave. - -We’ll use the helloworld-client provided by the `hello world` [docker image](https://hub.docker.com/r/buoyantio/helloworld/)in order to send test gRPC requests to our `hello world` service: - - -```bash -$ L5D_INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -$ docker run --rm --entrypoint=helloworld-client buoyantio/helloworld:0.1.3 $L5D_INGRESS_LB:4140 -Hello (10.196.1.242) world (10.196.1.243)!! -``` - - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -$ L5D_INGRESS_LB=$( \ - kubectl get po -l app=l5d \ - -o jsonpath="{.items[0].status.hostIP}") -$ docker run --rm --entrypoint=helloworld-client buoyantio/helloworld:0.1.3 \ - $L5D_INGRESS_LB:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[0].nodePort}') -Hello (10.196.1.242) world (10.196.1.243)!! -``` - -It works! We can check out the Linkerd admin dashboard by doing: - -```bash -open http://$L5D_INGRESS_LB:9990 # on OSX -``` - -Or using hostIP: - -```bash -open http://$L5D_INGRESS_LB:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[2].nodePort}') # on OSX -``` - -And that’s it! We now have gRPC services talking to each other, with their HTTP/2 requests being routed through Linkerd. Now we can use all of [Linkerd’s awesome features](https://linkerd.io/features/), including per-request routing, load balancing, circuit-breaking, retries, TLS, distributed tracing, service discovery integration and more, in our gRPC microservice applications! - -## HOW DID WE CONFIGURE LINKERD FOR GRPC OVER HTTP/2? - -Let’s take a step back and examine our config. What’s different about using gRPC rather than HTTP/1.1? Actually, not very much! If you compare our [Linkerd config for routing gRPC](https://raw.githubusercontent.com/BuoyantIO/linkerd-examples/master/k8s-daemonset/k8s/linkerd-grpc.yml) with the [config for plain old HTTP/1.1](https://raw.githubusercontent.com/BuoyantIO/linkerd-examples/master/k8s-daemonset/k8s/linkerd.yml), they’re quite similar (full documentation on configuring an HTTP/2 router can be found [here](https://linkerd.io/config/0.9.1/linkerd/index.html#http-2-protocol)). - -The changes you’ll notice are: - -### PROTOCOL - -We’ve changed the router `protocol` from `http` to `h2` (naturally!) and set the`experimental` flag to `true` to opt in to experimental HTTP/2 support. - -```yaml -routers: -- protocol: h2 - experimental: true -``` - -### IDENTIFIER - -We use the [header path identifier](https://linkerd.io/config/1.0.0/linkerd/index.html#http-2-header-path-identifier) to assign a logical name based on the gRPC request. gRPC clients set HTTP/2’s `:path` pseudo-header to `/package.Service/Method`. The header path identifier uses this pseudo-header to assign a logical name to the request (such as `/svc/helloworld.Hello/Greeting`). Setting `segments` to 1 means we only take the first segment of the path, in other words, dropping the gRPC `Method`. The resulting name can then be transformed via a [dtab](https://linkerd.io/in-depth/dtabs/) where we extract the gRPC service name, and route the request to a Kubernetes service of the same name. For more on how Linkerd routes requests, see our [routing](https://linkerd.io/in-depth/routing/) docs. - -```yaml -identifier: - kind: io.l5d.header.path - segments: 1 -``` - -### DTAB - -We’ve adjusted the dtab slightly, now that we’re routing on the `/serviceName` prefix from the header path identifier. The dtab below transforms the logical name assigned by the path identifier (`/svc/helloworld.Hello`) to a name that tells the [io.l5d.k8s namer](https://linkerd.io/config/1.0.0/linkerd/index.html#kubernetes-service-discovery) to query the API for the `grpc` port of the `hello` Service in the default namespace (`/#/io.l5d.k8s/default/grpc/Hello`). - -The [domainToPathPfx namer](https://linkerd.io/config/1.0.0/linkerd/index.html#domaintopathpfx) is used to extract the service name from the package-qualified gRPC service name, as seen in the dentry `/svc => /$/io.buoyant.http.domainToPathPfx/grpc`. - -Delegation to `world` is similar, however we’ve decided to version the `world` service, so we’ve added the additional rule `/grpc/World => /srv/world-v1` to send requests to world-v1. Our full dtab is now: - -```txt -/srv => /#/io.l5d.k8s/default/grpc; -/grpc => /srv; -/grpc/World => /srv/world-v1; -/svc => /$/io.buoyant.http.domainToPathPfx/grpc; -``` - -## CONCLUSION - -In this article, we’ve seen how to use Linkerd as a service mesh for gRPC requests, adding latency-aware load balancing, circuit breaking, and request-level routing to gRPC apps. Linkerd and gRPC are a great combination, especially as gRPC’s HTTP/2 underpinnings provide it with powerful mechanisms like multiplexed streaming, back pressure, and cancellation, which Linkerd can take full advantage of. Because gRPC includes routing information in the request, it’s a natural fit for Linkerd, and makes it very easy to set up Linkerd to route gRPC requests. For more on Linkerd’s roadmap around gRPC, see [Oliver’s blog post on the topic]({{< ref "http2-grpc-and-linkerd" >}}). - -Finally, for a more advanced example of configuring gRPC services, take a look at our [Gob microservice app](https://github.com/BuoyantIO/linkerd-examples/tree/master/gob). In that example, we additionally deploy [Namerd](https://github.com/linkerd/linkerd/tree/master/namerd), which we use to manage our routing rules centrally, and update routing rules without redeploying Linkerd. This lets us to do things like canarying and blue green deploys between different versions of a service. - -For more information on Linkerd, gRPC, and HTTP/2 head to the [Linkerd gRPC documentation](https://linkerd.io/features/grpc/) as well as our [config documentation for HTTP/2](https://linkerd.io/config/1.0.0/linkerd/index.html#http-2-protocol). - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing.md deleted file mode 100644 index bf42940114..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing.md +++ /dev/null @@ -1,245 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing' -title: 'A Service Mesh for Kubernetes, Part V: Dogfood environments, ingress and edge routing' -aliases: - - /2016/11/18/a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing/ -author: 'risha' -date: Fri, 18 Nov 2016 00:10:16 +0000 -thumbnail: /uploads/kubernetes5_featured_Twitter_ratio.png -draft: false -featured: false -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -In this post we’ll show you how to use a service mesh of linkerd instances to handle ingress traffic on Kubernetes, distributing traffic across every instance in the mesh. We’ll also walk through an example that showcases linkerd’s advanced routing capabilities by creating a *dogfood* environment that routes certain requests to a newer version of the underlying application, e.g. for internal, pre-release testing. - -_Update 2017-04-19_: this post is about using linkerd as an ingress point for traffic to a Kubernetes network. As of [0.9.1](https://github.com/linkerd/linkerd/releases/tag/0.9.1), linkerd supports the Kubernetes Ingress resource directly, which is an alternate, and potentially simpler starting point for some of the use cases in this article. For information on how to use linkerd as a [Kubernetes ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress/#ingress-controllers), please see Sarah’s blog post, [Linkerd as an ingress controller][part-viii]. - -This is one article in a series of articles about [linkerd](https://linkerd.io/), [Kubernetes](http://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) (this article) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -In previous installments of this series, we’ve shown you how you can use linkerd to capture [top-line service metrics][part-i], transparently [add TLS][part-iii] across service calls, and [perform blue-green deploys][part-iv]. These posts showed how using linkerd as a service mesh in environments like Kubernetes adds a layer of resilience and performance to internal, service-to-service calls. In this post, we’ll extend this model to ingress routing. - -Although the examples in this post are Kubernetes-specific, we won’t use the built-in [Ingress Resource](https://kubernetes.io/docs/concepts/services-networking/ingress/) that Kubernetes provides (for this, see [Sarah’s post][part-viii]). While Ingress Resources are a convenient way of doing basic path and host-based routing, at the time of writing, they are fairly limited. In the examples below, we’ll be reaching far beyond what they provide. - -## STEP 1: DEPLOY THE LINKERD SERVICE MESH - -Starting with our basic linkerd service mesh Kubernetes config from the previous articles, we’ll make two changes to support ingress: we’ll modify the linkerd config to add an additional logical router, and we’ll tweak the VIP in the Kubernetes Service object around linkerd. (The full config is here: [linkerd-ingress.yml](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-ingress.yml).) - -Here’s the new `ingress` logical router on linkerd instances that will handle ingress traffic and route it to the corresponding services: - -```yml -routers: - - protocol: http - label: ingress - dtab: | - /srv => /#/io.l5d.k8s/default/http ; - /domain/world/hello/www => /srv/hello ; - /domain/world/hello/api => /srv/api ; - /host => /$/io.buoyant.http.domainToPathPfx/domain ; - /svc => /host ; - interpreter: - kind: default - transformers: - - kind: io.l5d.k8s.daemonset - namespace: default - port: incoming - service: l5d - servers: - - port: 4142 - ip: 0.0.0.0 -``` - -In this config, we’re using linkerd’s routing syntax, [dtabs](https://linkerd.io/in-depth/dtabs/), to route requests from domain to service—in this case from “api.hello.world” to the `api` service, and from “www.hello.world” to the `world` service. For simplicity’s sake, we’ve added one rule per domain, but this mapping can easily be generified for more complex setups. (If you’re a linkerd config aficionado, we’re accomplishing this behavior by combining linkerd’s default [header token identifier](https://linkerd.io/config/1.0.0/linkerd/index.html#header-identifier) to route on the Host header, the [`domainToPathPfx` namer](https://linkerd.io/config/1.0.0/linkerd/index.html#domaintopathpfx) to turn dotted hostnames into hierarchical paths, and the [`io.l5d.k8s.daemonset` transformer](https://linkerd.io/config/1.0.0/linkerd/index.html#daemonset-kubernetes) to send requests to the corresponding host-local linkerd.) - -We’ve added this ingress router to every linkerd instance—in true service mesh fashion, we’ll fully distribute ingress traffic across these instances so that no instance is a single point of failure. - -We also need modify our k8s Service object to replace the `outgoing` VIP with an`ingress` VIP on port 80. This will allow us to send ingress traffic directly to the linkerd service mesh—mainly for debugging purposes, since the this traffic will not be sanitized before hitting linkerd. (In the next step, we’ll fix this.) The Kubernetes change looks like this: - -```bash ---- -apiVersion: v1 -kind: Service -metadata: - name: l5d -spec: - selector: - app: l5d - type: LoadBalancer - ports: - - name: ingress - port: 80 - targetPort: 4142 - - name: incoming - port: 4141 - - name: admin - port: 9990 -``` - -All of the above can be accomplished in one fell swoop by running this command to apply the [full linkerd service mesh plus ingress Kubernetes config](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-ingress.yml): - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-ingress.yml -``` - -## STEP 2: DEPLOY THE SERVICES - -For services in this example, we’ll use the same [hello and world configs](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml) from the previous blog posts, and we’ll add two new services: an [api service](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/api.yml), which calls both `hello` and `world`, and a new version of the world service, `world-v2`, which will return the word “earth” rather than “world”—our growth hacker team has assured us their A/B tests show this change will increase engagement tenfold. - -The following commands will deploy the three [hello world services](https://github.com/linkerd/linkerd-examples/tree/master/docker/helloworld) to the default namespace. These apps rely on the nodeName supplied by the [Kubernetes downward API](https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/) to find Linkerd. To check if your cluster supports nodeName, you can run this test job: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/node-name-test.yml -``` - -And then looks at its logs: - -```bash -kubectl logs node-name-test -``` - -If you see an ip, great! Go ahead and deploy the hello world app using: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/api.yml -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/world-v2.yml -``` - -If instead you see a “server can’t find …” error, deploy the hello-world legacy version that relies on hostIP instead of nodeName: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-legacy.yml -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/api-legacy.yml -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/world-v2.yml -``` - -At this point we should be able to test the setup by sending traffic through the `ingress` Kubernetes VIP. In the absence of futzing with DNS, we’ll set a Host header manually on the request: - - -```bash -$ INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -$ curl -s -H "Host: www.hello.world" $INGRESS_LB -Hello (10.0.5.7) world (10.0.4.7)!! -$ curl -s -H "Host: api.hello.world" $INGRESS_LB -{"api_result":"api (10.0.3.6) Hello (10.0.5.4) world (10.0.1.5)!!"} -``` - - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -INGRESS_LB=$(kubectl get po -l app=l5d -o jsonpath="{.items[0].status.hostIP}"):$(kubectl get svc l5d -o 'jsonpath={.spec.ports[0].nodePort}') -``` - -Success! We’ve set up linkerd as our ingress controller, and we’ve used it to route requests received on different domains to different services. And as you can see, production traffic is hitting the `world-v1` service—we aren’t ready to bring `world-v2`out just yet. - -## STEP 3: A LAYER OF NGINX - -At this point we have functioning ingress. However, we’re not ready for production just yet. For one thing, our ingress router doesn’t strip headers from requests, which means that external requests may include headers that we do not want to accept. For instance, linkerd allows setting the `l5d-dtab` header to [apply routing rules per-request](https://linkerd.io/features/routing/#per-request-routing). This is a useful feature for ad-hoc staging of new services, but it’s probably not appropriate calls from the outside world! - -For example, we can use the `l5d-dtab` header to override the routing logic to use `world-v2` rather than the production `world-v1` service the outside world: - -```bash -$ curl -H "Host: www.hello.world" -H "l5d-dtab: /host/world => /srv/world-v2;" $INGRESS_LB -Hello (10.100.4.3) earth (10.100.5.5)!! -``` - -Note the **earth** in the response, denoting the result of the `world-v2` service. That’s cool, but definitely not the kind of power we want to give just anyone! - -We can address this (and other issues, such as serving static files) by adding [nginx](https://nginx.com/) to the mix. If we configure nginx to strip incoming headers before proxying requests to the linkerd ingress route, we’ll get the best of both worlds: an ingress layer that is capable of safely handling external traffic, and linkerd doing dynamic, service-based routing. - -Let’s add nginx to the cluster. We’ll configure it using [this nginx.conf](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/nginx.yml). We’ll use the `proxy_pass` directive under our virtual servers `www.hello.world` and `api.hello.world` to send requests to the linkerd instances, and, for maximum fanciness, we’ll strip [linkerd’s context headers](https://linkerd.io/config/0.8.3/linkerd/index.html#context-headers) using the `more_clear_input_headers` directive (with wildcard matching) provided by the [Headers More](https://github.com/openresty/headers-more-nginx-module) module. - -(Alternatively, we could avoid third-party nginx modules by using nginx’s`proxy_set_header` directive to clear headers. We’d need separate entries for each `l5d-ctx-` header as well as the `l5d-dtab` and `l5d-sample` headers.) - -Note that as of [linkerd 0.9.0]({{< relref "linkerd-0-9-0-released" >}}), we can clear incoming `l5d-*` headers by setting `clearContext: true` on the ingress router [server](https://linkerd.io/config/1.0.0/linkerd/index.html#server-parameters). However, nginx has many features we can make use of (as you’ll see presently), so it is still valuable to use nginx in conjunction with linkerd. - -For those of you following along at home, we’ve published an nginx Docker image with the *Headers More* module installed ([Dockerfile here](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/docker/nginx/Dockerfile)) as [buoyantio/nginx:1.11.5](https://hub.docker.com/r/buoyantio/nginx/). We can deploy this image with our config above using this [Kubernetes config](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/nginx.yml): - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/nginx.yml -``` - -After waiting a bit for the external IP to appear, we can test that nginx is up by hitting the simple test endpoint in the nginx.conf: - -```bash -INGRESS_LB=$(kubectl get svc nginx -o jsonpath="{.status.loadBalancer.ingress[0].*}") -curl $INGRESS_LB -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -INGRESS_LB=$(kubectl get po -l app=nginx -o jsonpath="{.items[0].status.hostIP}"):$(kubectl get svc nginx -o 'jsonpath={.spec.ports[0].nodePort}') -``` - -We should be able to now send traffic to our services through nginx: - -```bash -$ curl -s -H "Host: www.hello.world" $INGRESS_LB -Hello (10.0.5.7) world (10.0.4.7)!! -$ curl -s -H "Host: api.hello.world" $INGRESS_LB -{"api_result":"api (10.0.3.6) Hello (10.0.5.4) world (10.0.1.5)!!"} -``` - -Finally, let’s try our header trick and attempt to communicate directly with the `world-v2` service: - -```bash -$ curl -H "Host: www.hello.world" -H "l5d-dtab: /host/world => /srv/world-v2;" $INGRESS_LB -Hello (10.196.1.8) world (10.196.2.13)!! -``` - -Great! No more **earth**. Nginx is sanitizing external traffic. - -## STEP 4: TIME FOR SOME DELICIOUS DOGFOOD! - -Ok, we’re ready for the good part: let’s set up a dogfood environment that uses the`world-v2` service, but only for some traffic! - -For simplicity, we’ll target traffic that sets a particular cookie,`special_employee_cookie`. In practice, you probably want something more sophisticated than this—authenticate it, require that it come from the corp network IP range, etc. - -With nginx and linkerd installed, accomplishing this is quite simple. We’ll use nginx to check for the presence of that cookie, and set a dtab override header for linkerd to adjust its routing. The relevant nginx config looks like this: - -```txt -if ($cookie_special_employee_cookie ~* "dogfood") { - set $xheader "/host/world => /srv/world-v2;"; -} - -proxy_set_header 'l5d-dtab' $xheader; -``` - -If you’ve been following the steps above, the deployed nginx already contains this configuration. We can test it like so: - -```bash -$ curl -H "Host: www.hello.world" --cookie "special_employee_cookie=dogfood" $INGRESS_LB -Hello (10.196.1.8) earth (10.196.2.13)!! -``` - -The system works! When this cookie is set, you’ll be in dogfood mode. Without it, you’ll be in regular, production traffic mode. Most importantly, dogfood mode can involve new versions of services that appear *anywhere* in the service stack, even many layers deep—as long as service code [forwards linkerd context headers](https://linkerd.io/config/1.0.0/linkerd/index.html#context-headers), the linkerd service mesh will take care of the rest. - -## Conclusion - -In this post, we saw how to use linkerd to provide powerful and flexible ingress to a Kubernetes cluster. We’ve demonstrated how to deploy a nominally production-ready setup that uses linkerd for service routing. And we’ve demonstrated how to use some of the advanced routing features of linkerd to decouple the *traffic-serving* topology from the *deployment topology*, allowing for the creation of dogfood environments without separate clusters or deploy-time complications. - -For more about running linkerd in Kubernetes, or if you have any issues configuring ingress in your setup, feel free to stop by our [linkerd community Slack](http://slack.linkerd.io/), ask a question on the [Linkerd Support Forum](https://linkerd.buoyant.io/), or [contact us directly](https://linkerd.io/overview/help/)! - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears.md deleted file mode 100644 index 64426bcbc9..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears' -title: 'A Service Mesh for Kubernetes, Part VI: Staging microservices without the tears' -aliases: - - /2017/01/06/a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears/ -author: 'risha' -date: Sat, 07 Jan 2017 00:13:02 +0000 -thumbnail: /uploads/kubernetes6_featured_Twitter_ratio.png -draft: false -featured: false -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -Staging new code before exposing it to production traffic is a critical part of building reliable, low-downtime software. Unfortunately, with microservices, the addition of each new service increases the complexity of the staging process, as the dependency graph between services grows quadratically with the number of services. In this article, we’ll show you how one of linkerd’s most powerful features, *per-request routing*, allows you to neatly sidestep this problem. - -This is one article in a series of articles about [linkerd](https://linkerd.io/), [Kubernetes](http://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) (this article) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -For a video presentation of the concepts discussed in this article, see [Alex Leong](https://twitter.com/adlleong)’s meetup talk, [Microservice Staging without the Tears](https://youtu.be/y0D5EAXvUpg). - -Linkerd is a service mesh for cloud-native applications. It acts as a transparent request proxy that adds a layer of resilience to applications by wrapping cross-service calls with features like latency-aware load balancing, retry budgets, deadlines, and circuit breaking. - -In addition to improving application resilience, linkerd also provides a powerful routing language that can alter how request traffic flows between services at runtime. In this post, we’ll demonstrate linkerd’s ability to do this routing, not just globally, but on a per-request basis. We’ll show how this *per-request routing* can be used to create ad-hoc staging environments that allow us to test new code in the context of the production application, without actually exposing the new code to production traffic. Finally, we’ll show how (in contrast to staging with a dedicated staging environment) ad-hoc staging requires neither coordination with other teams, nor the costly process of keeping multiple deployment environments in sync. - -## Why stage? - -Why is staging so important? In modern software development, code goes through a rigorous set of practices designed to assess *correctness*: code review, unit tests, integration tests, etc. Having passed these hurdles, we move to assessing *behaviour*: how fast is the new code? How does it behave under load? How does it interact with runtime dependencies, including other services? - -These are the questions that a staging environment can answer. The fundamental principle of staging is that the closer to the production environment, the more realistic staging will be. Thus, while mocks and stub implementations make sense for tests, for staging, we ideally want actual running services. The best staging environment is one in which the surrounding environment is exactly the same as it will be in production. - -## Why is staging hard for microservices? - -When your application consists of many services, the interaction between these services becomes a critical component of end-to-end application behaviour. In fact, the more that the application is disaggregated into services, the more that the runtime behaviour of the application is determined not just by the services themselves, but by the interactions between them. - -Unfortunately, increasing the number of services doesn’t just increase the importance of proper staging, it also increases the difficulty of doing this properly. Let’s take a look at a couple common ways of staging, and why they suffer in multi-service environments. - -A frequent choice for staging is the shared staging cluster, wherein your staged service is deployed into a dedicated staging environment alongside other staged services. The problem with this approach is that there is no isolation. If, as in the diagram below, Alex deploys his Foo service and sees weird behaviour, it’s difficult to determine the source—it could be due to the staging deploys of Alex, Alice, or Bob, or simply the mock data in the database. Keeping the staging environment in sync with production can be very difficult, especially as the number of services, teams, and releases all start to increase. - -{{< fig - alt="Diagram" - title="Diagram" - src="/uploads/2017/07/buoyant-1_everyone.png" >}} - -An alternative to the shared environment that addresses the lack of isolation is the “personal” or per-developer, staging cluster. In this model, every developer can spin up a staging cluster on demand. To keep our staging effective, staging a service requires staging its upstream and downstream dependencies as well. (For example, in the diagram below, Alex would need to deploy Web FE and API in order to ensure the changes he made to his Foo service are correctly reflected there.) Unfortunately, maintaining the ability to deploy arbitrary subsets of the application topology on demand also becomes very complex, especially as the application topology becomes larger, and as services have independent deployment models. - -{{< fig - alt="Diagram 2" - title="Diagram 2" - src="/uploads/2017/07/buoyant-2_personal.png" >}} - -Finally, there is the (sadly prevalent!) option of simply deploying fresh code into production and rolling it back when flaws are discovered. Of course, this is rather risky, and may not be an option for applications that handle, e.g., financial transactions. There are many other ways you could obtain a staging environment, but in this article, we’ll describe a straightforward, tear-free approach. - -## A better path - -Fortunately, with linkerd, we can do staging without incurring the costs detailed above, by creating *ad-hoc staging environments*. In fact, one of the prime motivations for the routing layer in Finagle, the library underlying linkerd, was solving this very problem at Twitter! Let’s consider again the goal of staging Alex’s Foo service. What if, rather than deploying to a separate environment, we could simply substitute Foo-staging in place of Foo-production, for a specific request? That would give us the ability to stage Foo safely, against the production environment, without requiring any deployment other than that of Foo-staging itself. This is the essence of ad-hoc staging environments. The burden on the developer is now greatly eased: Alex must simply stage his new code, set a header on ingress requests, and voila! - -{{< fig - alt="Diagram 3" - title="Diagram 3" - src="/uploads/2017/07/buoyant-3_request_path.png" >}} - -Happily, linkerd’s per-request routing allow us to do just this. With linkerd proxying traffic, we can set a routing “override” for a particular request using the `l5d-dtab`header. This header allows you to set routing rules (called, in Finagle parlance, “[Dtabs](https://linkerd.io/in-depth/dtabs/)”) for that request. For example, the dtab rule `/s/foo => /srv/alex-foo` might override the production routing rule for Foo. Attaching this change to a *single request* would allow us to reach Alex’s Foo service, but only for that request. Linkerd propagates this rule, so any usage of Alex’s Foo service anywhere in the application topology, for the lifetime of that request, will be properly handled. - -{{< fig - alt="Diagram 4" - title="Diagram 4" - src="/uploads/2017/07/buoyant-4_override.png" >}} - -## Trying this at home - -Keen readers of our [Service Mesh for Kubernetes][part-i] series will note that we’ve already seen an example of this in [our dogfood blog post][part-v]. We deployed a `world-v2` service, and we were able to send individual dogfood requests through this service via a simple header containing a routing override. Now, we can use this same mechanism for something else: setting up an ad hoc staging environment. - -Let’s deploy two versions of a service and use linkerd’s routing capabilities to test our new service before using it in production. We’ll deploy our `hello` and `world-v1`services as our running prod services, and then we’ll create an ad-hoc staging environment to stage and test a new version of world, `world-v2`. - -## STEP 1: DEPLOY LINKERD AND OUR HELLO-WORLD SERVICES - -We’ll use the hello world service from the previous blog posts. This consists of a hello service that calls a world service. These apps rely on the nodeName supplied by the [Kubernetes downward API](https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/) to find Linkerd. To check if your cluster supports nodeName, you can run this test job: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/node-name-test.yml -``` - -And then looks at its logs: - -```bash -kubectl logs node-name-test -``` - -If you see an ip, great! Go ahead and deploy the hello world app using: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml -``` - -If instead you see a “server can’t find …” error, deploy the hello-world legacy version that relies on hostIP instead of nodeName: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-legacy.yml -``` - -Let’s deploy our prod environment (linkerd, and the hello and world services): - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-ingress.yml -``` - -Let’s also deploy linkerd and the service we want to stage, world-v2, which will return the word “earth” rather than “world”. - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-ingress.yml -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/world-v2.yml -``` - -## STEP 2: USE PER REQUEST OVERRIDES IN OUR AD-HOC STAGING ENVIRONMENT - -So now that we have a running world-v2, let’s test it by running a request through our production topology, except that instead of hitting `world-v1`, we’ll hit `world-v2`. First, let’s run an unmodified request through our default topology (you may have to wait for l5d’s external IP to appear): - - -```bash -$ INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -$ curl -H "Host: www.hello.world" $INGRESS_LB -Hello (10.196.2.232) world (10.196.2.233)!! -``` - - -Or if external load balancer support is unavailable for the cluster, use hostIP: - - -```bash -$ INGRESS_LB=$(kubectl get po -l app=l5d -o jsonpath="{.items[0].status.hostIP}"):$(kubectl get svc l5d -o 'jsonpath={.spec.ports[0].nodePort}') -$ curl -H "Host: www.hello.world" $INGRESS_LB -Hello (10.196.2.232) world (10.196.2.233)!! -``` - - -As we expect, this returns `Hello (......) World (.....)` from our production topology. Now, how do we get to the staging environment? All we have to do is pass the following dtab override and requests through the prod topology will go to `world-v2`! A dtab override is another dtab entry that we pass using headers in the request. Since later dtab rules are applied first, this rule will replace (override) our current “/host/world => /srv/world-v1” rule with a rule to send requests with `/host/world` to `/srv/world-v2` instead. - -```bash -$ curl -H "Host: www.hello.world" -H "l5d-dtab: /host/world => /srv/world-v2;" $INGRESS_LB -Hello (10.196.2.232) earth (10.196.2.234)!! -``` - -We now see “earth” instead of “world”! The request is successfully served from the world-v2 service wired up to our existing production topology, with no code changes or additional deploys. Success! Staging is now fun and easy. - -[Dtabs](https://linkerd.io/in-depth/dtabs/) and [routing](https://linkerd.io/in-depth/routing/) in linkerd are well documented. During development, you can also make use of linkerd’s “dtab playground” at `http://$INGRESS_LB:9990/delegator`. By going to the “outgoing” router and testing a request name like `/http/1.1/GET/world`, you can see linkerd’s routing policy in action. - -## In practice - -In practice, there are some caveats to using this approach. First, the issue of writes to production databases must be addressed. The same dtab override mechanism can be used to send any writes to a staging database, or, with some application-level intelligence, to /dev/null. It is recommended that these rules are not created by hand so as to avoid expensive mistakes with production data! - -Secondly, you application needs to forward [linkerd’s context headers](https://linkerd.io/features/routing/#per-request-routing) for this to work. - -Lastly, it’s important to ensure that the `l5d-dtab` header is not settable from the outside world! In our post about [setting up a dogfood environment in Kubernetes][part-v], we gave an example nginx configuration for ingress that would strip unknown headers from the outside world—good practice for a variety of reasons. - -## Conclusion - -We’ve demonstrated how to create ad-hoc staging environments with linkerd by setting per-request routing rules. With this approach, we can stage services in the context of production environment, without modifying existing code, provisioning extra resources for our staging environment (other than for the staging instance itself), or maintaining parallel environments for production and staging. For microservices with complex application topologies, this approach can provide an easy, low-cost way to staging services before pushing to production. - -For more about running linkerd in Kubernetes, or if you have any issues configuring ingress in your setup, feel free to stop by our [linkerd community Slack](https://slack.linkerd.io/), ask a question on the [Linkerd Support Forum](https://linkerd.buoyant.io/), or [contact us directly](https://linkerd.io/overview/help/)! - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy.md deleted file mode 100644 index 10f5c39d58..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy.md +++ /dev/null @@ -1,203 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy' -title: 'A Service Mesh for Kubernetes, Part VII: Distributed tracing made easy' -aliases: - - /2017/03/14/a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy/ -author: 'kevin' -date: Tue, 14 Mar 2017 23:26:32 +0000 -thumbnail: /uploads/kubernetes7_featured_Twitter_ratio.png -draft: false -featured: false -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -Linkerd’s role as a *service mesh* makes it a great source of data around system performance and runtime behavior. This is especially true in polyglot or heterogeneous environments, where instrumenting each language or framework can be quite difficult. Rather than instrumenting each of your apps directly, the service mesh can provide a uniform, standard layer of application tracing and metrics data, which can be collected by systems like [Zipkin](http://zipkin.io/) and [Prometheus](https://prometheus.io/). - -In this post we’ll walk through a simple example how Linkerd and Zipkin can work together in Kubernetes to automatically get distributed traces, with only minor changes to the application. This is one article in a series of articles about [Linkerd](https://linkerd.io/), [Kubernetes](http://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) (this article) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -In previous installments of this series, we’ve shown you how you can use Linkerd to [capture top-line service metrics][part-i]. Service metrics are vital for determining the health of individual services, but they don’t capture the way that multiple services work (or don’t work!) together to serve requests. To see a bigger picture of system-level performance, we need to turn to distributed tracing. - -In a previous post, we covered some of the [benefits of distributed tracing][polyglot], and how to configure Linkerd to export tracing data to [Zipkin](http://zipkin.io/). In this post, we’ll show you how to run this setup entirely in Kubernetes, including Zipkin itself, and how to derive meaningful data from traces that are exported by Linkerd. - -## A Kubernetes Service Mesh - -Before we start looking at traces, we’ll need to deploy Linkerd and Zipkin to Kubernetes, along with some sample apps. The [linkerd-examples](https://github.com/linkerd/linkerd-examples/tree/master/k8s-daemonset) repo provides all of the configuration files that we’ll need to get tracing working end-to-end in Kubernetes. We’ll walk you through the steps below. - -## STEP 1: INSTALL ZIPKIN - -We’ll start by installing Zipkin, which will be used to collect and display tracing data. In this example, for convenience, we’ll use Zipkin’s in-memory store. (If you plan to run Zipkin in production, you’ll want to switch to using one of its persistent backends.) To install Zipkin in the default Kubernetes namespace, run: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/zipkin.yml -``` - -You can confirm that installation was successful by viewing Zipkin’s web UI: - -```bash -ZIPKIN_LB=$(kubectl get svc zipkin -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$ZIPKIN_LB # on OS X -``` - -Note that it may take a few minutes for the ingress IP to become available. Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -ZIPKIN_LB=$(kubectl get po -l app=zipkin -o jsonpath="{.items[0].status.hostIP}"):$(kubectl get svc zipkin -o 'jsonpath={.spec.ports[0].nodePort}') -open http://$ZIPKIN_LB # on OS X -``` - -However, the web UI won’t show any traces until we install Linkerd. - -## STEP 2: INSTALL THE SERVICE MESH - -Next we’ll install the Linkerd service mesh, configured to write tracing data to Zipkin. To install Linkerd as a DaemonSet (i.e., one instance per host) in the default Kubernetes namespace, run: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-zipkin.yml -``` - -This installed Linkerd as a service mesh, exporting tracing data with Linkerd’s [Zipkin telemeter](https://linkerd.io/config/0.9.0/linkerd/index.html#zipkin-telemeter). The relevant config snippet is: - -```bash -telemetry: -- kind: io.l5d.zipkin - host: zipkin-collector.default.svc.cluster.local - port: 9410 - sampleRate: 1.0 -``` - -Here we’re telling Linkerd to send tracing data to the Zipkin service that we deployed in the previous step, on port 9410. The configuration also specifies a sample rate, which determines the number of requests that are traced. In this example we’re tracing all requests, but in a production setting you may want to set the rate to be much lower (the default is 0.001, or 0.1% of all requests). - -You can confirm the installation was successful by viewing Linkerd’s admin UI (note, again, that it may take a few minutes for the ingress IP to become available, depending on the vagaries of your cloud provider): - -```bash -L5D_INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$L5D_INGRESS_LB:9990 # on OS X -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -L5D_INGRESS_LB=$(kubectl get po -l app=l5d -o jsonpath="{.items[0].status.hostIP}") -open http://$L5D_INGRESS_LB:$(kubectl get svc l5d -o 'jsonpath={.spec.ports[2].nodePort}') # on OS X -``` - -## STEP 3: INSTALL THE SAMPLE APPS - -Now we’ll install the “hello” and “world” apps in the default namespace. These apps rely on the nodeName supplied by the [Kubernetes downward API](https://kubernetes.io/docs/tasks/inject-data-application/downward-api-volume-expose-pod-information/) to find Linkerd. To check if your cluster supports nodeName, you can run this test job: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/node-name-test.yml -``` - -And then looks at its logs: - -```bash -kubectl logs node-name-test -``` - -If you see an ip, great! Go ahead and deploy the hello world app using: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml -``` - -If instead you see a “server can’t find …” error, deploy the hello-world legacy version that relies on hostIP instead of nodeName: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-legacy.yml -``` - -Congrats! At this point, we have a functioning service mesh with distributed tracing enabled, and an application that makes use of it. Let’s see the entire setup in action by sending traffic through Linkerd’s outgoing router running on port 4140: - -```bash -http_proxy=http://$L5D_INGRESS_LB:4140 curl -s http://hello -Hello () world ()! -``` - -Or if using hostIP: - -```bash -http_proxy=http://$L5D_INGRESS_LB: -``` - -If everything is working, you’ll see a “Hello world” message similar to that above, with the IPs of the pods that served the request. - -## STEP 4: ENJOY THE VIEW - -Now it’s time to see some traces. Let’s start by looking at the trace that was emitted by the test request that we sent in the previous section. Zipkin’s UI allows you to search by “span” name, and in our case, we’re interested in spans that originated with the Linkerd router running on 0.0.0.0:4140, which is where we sent our initial request. We can search for that span as follows: - -```bash -open http://$ZIPKIN_LB/?serviceName=0.0.0.0%2F4140 # on OS X -``` - -That should surface 1 trace with 8 spans, and the search results should look like this: - -{{< fig - alt="trace" - title="trace" - src="/uploads/2017/07/buoyant-k8s-tracing-search-1-large-1024x352.png" >}} - -Clicking on the trace from this view will bring up the trace detail view: - -{{< fig - alt="detail view" - title="detail view" - src="/uploads/2017/07/buoyant-k8s-tracing-trace-1-large-1024x360.png" >}} - -From this view, you can see the timing information for all 8 spans that Linkerd emitted for this trace. The fact that there are 8 spans for a request between 2 services stems from the service mesh configuration, in which each request passes through two Linkerd instances (so that the protocol can be upgraded or downgraded, or [TLS can be added and removed across node boundaries][part-iii]). Each Linkerd router emits both a server span and a client span, for a total of 8 spans. - -Clicking on a span will bring up additional details for that span. For instance, the last span in the trace above represents how long it took the world service to respond to a request—8 milliseconds. If you click on that span, you’ll see the span detail view: - -{{< fig - alt="span detail" - title="span detail" - src="/uploads/2017/07/buoyant-k8s-tracing-span-1-large-1024x712.png" >}} - -This view has a lot more information about the span. At the top of the page, you’ll see timing information that indicates when Linkerd sent the request to the service, and when it received a response. You’ll also see a number of key-value pairs with additional information about the request, such as the request URI, the response status code, and the address of the server that served the request. All of this information is populated by Linkerd automatically, and can be very useful in tracking down performance bottlenecks and failures. - -## A NOTE ABOUT REQUEST CONTEXT - -In order for distributed traces to be properly disentangled, we need a little help from the application. Specifically, we need services to forward Linkerd’s “context headers” (anything that starts with `l5d-ctx-`) from incoming requests to outgoing requests. Without these headers, it’s impossible to align outgoing requests with incoming requests through a service. (The hello and world services provided above do this by default.) - -There are some additional benefits to forwarding context headers, beyond tracing. From our [previous blog post](/2016/05/17/distributed-tracing-for-polyglot-microservices/#request-context) on the topic: - -> Forwarding request context for Linkerd comes with far more benefits than just tracing, too. For instance, adding the `l5d-dtab` header to an inbound request will add a dtab override to the request context. Provided you propagate request context, dtab overrides can be used to apply [per-request routing overrides](https://linkerd.io/features/routing/#per-request-routing) at any point in your stack, which is especially useful for staging ad-hoc services within the context of a production application. In the future, request context will be used to propagate overall *latency budgets*, which will make handling requests within distributed systems much more performant. Finally, the `L5d-sample` header can be used to adjust the tracing sample rate on a per-request basis. To guarantee that a request will be traced, set `L5d-sample: 1.0`. If you’re sending a barrage of requests in a loadtest that you don’t want flooding your tracing system, consider setting it to something much lower than the steady-state sample rate defined in your Linkerd config. - -## Conclusion - -We’ve demonstrated how to run Zipkin in Kubernetes, and how to configure your Linkerd service mesh to automatically export tracing data to Zipkin. Distributed tracing is a powerful tool that is readily available to you if you’re already using Linkerd. Check out Linkerd’s [Zipkin telemeter](https://linkerd.io/config/1.0.0/linkerd/index.html#zipkin-telemeter) configuration reference, and find us in the [Linkerd Slack](https://slack.linkerd.io/) if you run into any issues setting it up. - -## APPENDIX: UNDERSTANDING TRACES - -In distributed tracing, a trace is a collection of spans that form a tree structure. Each span has a start timestamp and an end timestamp, as well as additional metadata about what occurred in that interval. The first span in a trace is called the root span. All other spans have a parent ID reference that refers to the root span or one of its descendants. There are two types of spans: server and client. In Linkerd’s context, server spans are created when a Linkerd router receives a request from an upstream client. Client spans are created when Linkerd sends that request to a downstream server. Thus the parent of a client span is always a server span. In the process of routing a multi-service request, Linkerd will emit multiple client and server spans, which are displayed as a single trace in the Zipkin UI. - -For instance, consider the following trace: - -{{< fig - alt="diagram" - title="diagram" - src="/uploads/2017/07/buoyant-k8s-tracing-diagram.png" >}} - -In this example, an external request is routed by Linkerd to the “Web” service, which then calls “Service B” and “Service C” sequentially (via Linkerd) before returning a response. The trace has 6 spans, and a total duration of 20 milliseconds. The 3 yellow spans are *server spans*, and the 3 blue spans are *client spans*. The *root span* is Span A, which represents the time from when Linkerd initially received the external request until it returned the response. Span A has one child, Span B, which represents the amount of time that it took for the Web service to respond to Linkerd’s forwarded request. Likewise Span D represents the amount of time that it took for Service B to respond to the request from the Web service. For more information about tracing, read our previous blog post, [Distributed Tracing for Polyglot Microservices][polyglot]. - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} [polyglot]: /2016/05/17/distributed-tracing-for-polyglot-microservices/ diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller.md deleted file mode 100644 index eca80d5554..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller.md +++ /dev/null @@ -1,273 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller' -title: 'A Service Mesh for Kubernetes, Part VIII: Linkerd as an ingress controller' -aliases: - - /2017/04/06/a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller/ -author: 'sarah' -thumbnail: /uploads/kubernetes8_featured_Twitter_ratio.png -date: Thu, 06 Apr 2017 23:34:10 +0000 -draft: false -featured: false -tags: [Linkerd, linkerd, News, tutorials] ---- - -Linkerd is designed to make service-to-service communication internal to an application safe, fast and reliable. However, those same goals are also applicable at the edge. In this post, we’ll demonstrate a new feature of Linkerd which allows it to act as a Kubernetes ingress controller, and show how it can handle ingress traffic both with and without TLS. - -This is one article in a series of articles about Linkerd, Kubernetes, and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) (this article) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -In a [previous installment][part-v] of this series, we explored how to receive external requests by deploying Linkerd as a Kubernetes DaemonSet and routing traffic through the corresponding Service VIP. In this post, we’ll simplify this setup by using Linkerd as a [Kubernetes ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress/#ingress-controllers), taking advantage of features introduced in [Linkerd 0.9.1](https://github.com/linkerd/linkerd/releases/tag/0.9.1). - -This approach has the benefits of simplicity and a tight integration with the Kubernetes API. However, for more complex requirements like on-demand TLS cert generation, SNI, or routing based on cookie values (e.g. the employee dogfooding approach discussed in [Part V of this series][part-v]), combining Linkerd with a dedicated edge layer such as NGINX is still necessary. - -What is a Kubernetes ingress controller? An ingress controller is an edge router that accepts traffic from the outside world and forwards it to services in your Kubernetes cluster. The ingress controller uses HTTP host and path routing rules defined in Kubernetes’ [ingress resources](https://kubernetes.io/docs/concepts/services-networking/ingress/). - -## INGRESS HELLO WORLD - -Using a [Kubernetes config](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-ingress-controller.yml) from the [linkerd-examples](https://github.com/linkerd/linkerd-examples) repo, we can launch Linkerd as a dedicated ingress controller. The config follows the same pattern as our [previous posts on k8s daemonsets][part-ii]: it deploys an `l5d-config` ConfigMap, an `l5d` DaemonSet, and an `l5d` Service. - -{{< fig - alt="diagram" - title="diagram" - src="/uploads/2017/07/buoyant-k8s-hello-world-ingress-controller-1.png" >}} - -### STEP 1: DEPLOY LINKERD - -First let’s deploy Linkerd. You can of course deploy into the default namespace, but here we’ve put Linkerd in its own namespace for better separation of concerns: - -```bash -kubectl create ns l5d-system -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-ingress-controller.yml -n l5d-system -``` - -You can verify that the Linkerd pods are up by running: - -```bash -$ kubectl get po -n l5d-system -NAME READY STATUS RESTARTS AGE -l5d-0w0f4 2/2 Running 0 5s -l5d-3cmfp 2/2 Running 0 5s -l5d-dj1sm 2/2 Running 0 5s -``` - -And take a look at the admin dashboard (This command assumes your cluster supports LoadBalancer services, and remember that it may take a few minutes for the ingress LB to become available.): - -```bash -L5D_SVC_IP=$(kubectl get svc l5d -n l5d-system -o jsonpath="{.status.loadBalancer.ingress[0].*}") -open http://$L5D_SVC_IP:9990 # on OS X -``` - -Or if external load balancer support is unavailable for the cluster, use hostIP: - -```bash -HOST_IP=$(kubectl get po -l app=l5d -n l5d-system -o jsonpath="{.items[0].status.hostIP}") -L5D_SVC_IP=$HOST_IP:$(kubectl get svc l5d -n l5d-system -o 'jsonpath={.spec.ports[0].nodePort}') -open http://$HOST_IP:$(kubectl get svc l5d -n l5d-system -o 'jsonpath={.spec.ports[1].nodePort}') # on OS X -``` - -Let’s take a closer look at the ConfigMap we just deployed. It stores the `config.yaml`file that Linkerd mounts on startup. - -```bash -$ kubectl get cm l5d-config -n l5d-system -o yaml -apiVersion: v1 -data: - config.yaml: |- - namers: - - kind: io.l5d.k8s - - routers: - - protocol: http - identifier: - kind: io.l5d.ingress - servers: - - port: 80 - ip: 0.0.0.0 - clearContext: true - dtab: /svc => /#/io.l5d.k8s - - usage: - orgId: linkerd-examples-ingress -``` - -You can see that this config defines an HTTP router on port 80 that identifies incoming requests using ingress resources (via the [`io.l5d.ingress` identifier](https://linkerd.io/config/1.0.0/linkerd/index.html#ingress-identifier)). The resulting namespace, port, and service name are then passed to the [Kubernetes namer](https://linkerd.io/config/1.0.0/linkerd/index.html#kubernetes-service-discovery) for resolution. We’ve also set `clearContext` to `true` in order to remove any incoming Linkerd context headers from untrusted sources. - -### STEP 2: DEPLOY THE HELLO WORLD APPLICATION - -Now it’s time to deploy our application, so that our ingress controller can route traffic to us. We’ll deploy a simple app consisting of a hello and a world service. - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/world-v2.yml -``` - -You can again verify that the pods are up and running: - -```bash -$ kubectl get po -NAME READY STATUS RESTARTS AGE -hello-0v0vx 1/1 Running 0 5s -hello-84wfp 1/1 Running 0 5s -hello-mrcfr 1/1 Running 0 5s -world-v1-105tl 1/1 Running 0 5s -world-v1-1t6jc 1/1 Running 0 5s -world-v1-htwsw 1/1 Running 0 5s -world-v2-5tl10 1/1 Running 0 5s -world-v2-6jc1t 1/1 Running 0 5s -world-v2-wswht 1/1 Running 0 5s -``` - -At this point, if you try to send an ingress request, you’ll see something like: - -```bash -$ curl $L5D_SVC_IP -Unknown destination: Request("GET /", from /184.23.234.210:58081) / no ingress rule matches -``` - -### STEP 3: CREATE THE INGRESS RESOURCE - -In order for our Linkerd ingress controller to function properly, we need to create an [ingress resource](https://kubernetes.io/docs/concepts/services-networking/ingress/) that uses it. - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/hello-world-ingress.yml -``` - -Verify the resource: - -```bash -$ kubectl get ingress -NAME HOSTS ADDRESS PORTS AGE -hello-world world.v2 80 7s -``` - -This “hello-world” ingress resource references our backends (we’re only using `world-v1` and `world-v2` for this demo): - -```bash -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: hello-world - annotations: - kubernetes.io/ingress.class: "linkerd" -spec: - backend: - serviceName: world-v1 - servicePort: http - rules: - - host: world.v2 - http: - paths: - - backend: - serviceName: world-v2 - servicePort: http -``` - -The resource - -- Specifies `world-v1` as the default backend to route to if a request does not match any of the rules defined. -- Specifies a rule where all requests with the host header `world.v2` will be routed to the `world-v2` service. -- Sets the `kubernetes.io/ingress.class` annotation to “linkerd”. Note, this annotation is only required if there are multiple ingress controllers running in the cluster. - -That’s it! You can exercise these rules by curling the IP assigned to the l5d service loadbalancer. - -```bash -$ curl $L5D_SVC_IP -world (10.0.4.7)! -$ curl -H "Host: world.v2" $L5D_SVC_IP -earth (10.0.1.5)! -``` - -While this example starts with totally new instances, it’s just as easy to add an ingress identifier router to a pre-existing linked setup. Also, although we employ a DaemonSet here (to be consistent with the rest of the Service Mesh for Kubernetes series), utilizing a Kubernetes [Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) for a Linkerd ingress controller works just as well. Using Deployments is left as an exercise for the reader. :) - -## INGRESS WITH TLS - -Linkerd already supports TLS for clients and servers within the cluster. Setting up TLS is described in much more detail in [Part III of this series][part-iii]. In this ingress controller configuration, Linkerd expects certs to be defined in a [Kubernetes secret](https://kubernetes.io/docs/concepts/configuration/secret/) named `ingress-certs` and to follow [the format described as part of the ingress user guide](https://kubernetes.io/docs/concepts/services-networking/ingress/#tls). Note that there’s no need to specify a TLS section as part of the ingress resource: Linkerd doesn’t implement that section of the resource. All TLS configuration happens as part of the `l5d-config` ConfigMap. - -The Linkerd config remains largely unchanged, save updating the server port to `443`and adding TLS file paths: - -```bash -... -servers: -- port: 443 - ip: 0.0.0.0 - clearContext: true - tls: - certPath: /io.buoyant/linkerd/certs/tls.crt - keyPath: /io.buoyant/linkerd/certs/tls.key -... -``` - -The l5d DaemonSet now mounts a secret volume with the expected name: `ingress-certs` - -```bash -spec: - volumes: - - name: certificates - secret: - secretName: ingress-certs - ... - containers: - - name: l5d - ... - ports: - - name: tls - containerPort: 443 - hostPort: 443 - ... - volumeMounts: - - name: "certificates" - mountPath: "/io.buoyant/linkerd/certs" - readOnly: true - ... -``` - -And the updated Service config exposes port `443`. A reminder that the certificates we’re using here are for testing purposes only! Create the Secret, delete the DaemonSet and ConfigMap, and re-apply the ingress controller config: - -```bash -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/ingress-certificates.yml -n l5d-system -kubectl delete ds/l5d configmap/l5d-config -n l5d-system -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-tls-ingress-controller.yml -n l5d-system -``` - -You should now be able to make an encrypted request: - - -```bash -# Example requires this development cert: https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/certificates/cert.pem -# The cert expects "hello.world" host, so we add an /etc/hosts entry, eg: -# 104.198.196.230 hello.world -# where "104.198.196.230" is the ip stored in $L5D_SVC_IP -$ curl --cacert cert.pem -H "Host: world.v2" https://hello.world -$ earth (10.0.1.5)! -``` - - -## CONCLUSION - -Linkerd provides a ton of benefits as an edge router. In addition to the dynamic routing and TLS termination described in this post, it also [pools connections](https://en.wikipedia.org/wiki/Connection_pool), [load balances dynamically](/2016/03/16/beyond-round-robin-load-balancing-for-latency/) , [enables circuit breaking](/2017/01/14/making-microservices-more-resilient-with-circuit-breaking/) , and supports [distributed tracing][part-vii]. Using the Linkerd ingress controller and the [Kubernetes configuration](https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd-ingress-controller.yml) referenced in this post, you gain access to all these features in an easy to use, Kubernetes-native approach. Best of all, this method works seamlessly with the rest of the service mesh, allowing for operation, visibility, and high availability in virtually any cloud architecture. - -The [ingress identifier is new](https://github.com/linkerd/linkerd/pull/1116), so we’d love to get your thoughts on what features you want from an ingress controller. You can find us in the [Linkerd community Slack](https://slack.linkerd.io/) or on the [Linkerd Support Forum](https://linkerd.buoyant.io/). - -### ACKNOWLEDGEMENTS - -Big thanks to [Alex Leong](https://twitter.com/adlleong) and [Andrew Seigner](https://twitter.com/siggy) for feedback on this post. - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-x-the-service-mesh-api.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-x-the-service-mesh-api.md deleted file mode 100644 index 323167cb55..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-x-the-service-mesh-api.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-x-the-service-mesh-api' -title: 'A Service Mesh For Kubernetes, Part X: The Service Mesh API' -aliases: - - /2017/05/24/a-service-mesh-for-kubernetes-part-x-the-service-mesh-api/ -author: 'alex' -date: Wed, 24 May 2017 23:36:26 +0000 -thumbnail: /uploads/kubernetes10_featured_Twitter_ratio.png -draft: false -featured: false -tags: [Linkerd, linkerd, News, tutorials] ---- - -As part of our Linkerd 1.0 release last month, we snuck in something that a few people have picked up on—Linkerd’s *service mesh API*. With the 1.0 release happily out of the way, we thought we’d take a moment to explain what this API does and what it means for the future of Linkerd. We’ll also show off one of the upcoming features of this API—dynamic control over Linkerd’s per-service *communications policy*. - -## THE LINKERD SERVICE MESH - -This morning at [Gluecon](http://gluecon.com/), Buoyant CTO [Oliver Gould](https://twitter.com/olix0r) delivered a keynote entitled **The Service Mesh**. In this keynote, he outlined the vision of the service mesh, as exemplified by [Linkerd](https://linkerd.io/). While Linkerd is often added to systems built on Kubernetes for its ability to add *resiliency*, the full vision of the service mesh is much more than that. As William Morgan writes in his blog post, [What’s a Service Mesh?](/2017/04/25/whats-a-service-mesh-and-why-do-i-need-one/): - -> The explicit goal of the service mesh is to move service communication out of the realm of the invisible, implied infrastructure, and into the role of a first-class member of the ecosystem—where it can be monitored, managed and controlled. - -For Linkerd, this means that every aspect of its behavior should be not only instrumented and observable, but also *controllable* at runtime. And ideally, this mutability should take place, not via config file edits and hot reloading, but via a unified and well-designed runtime API. - -This is, in short, the purpose of Linkerd’s service mesh API. To that end, we’ve introduced the [`io.l5d.mesh` interpreter](https://linkerd.io/config/1.0.0/linkerd/index.html#namerd-mesh) and [a new gRPC API for Namerd](https://linkerd.io/config/1.0.0/namerd/index.html#grpc-mesh-interface). Together, these provide the ability to dynamically control routing policy, and form the core of Linkerd’s service mesh API. This is a first step towards the eventual goal of providing a unified, global model of control over every aspect of Linkerd’s behavior. - -Linkerd 1.0 also introduced a new type of policy that *isn’t* yet exposed via the service mesh API—per-service *communications policy*. In this post, we’ll show how to configure this policy today, and we’ll describe the future work needed to add this control to Linkerd’s service mesh API. - -This article is one of a series of articles about [Linkerd](https://linkerd.io/), [Kubernetes](https://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) (this article) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -## COMMUNICATIONS POLICY - -Linkerd’s new per-service *communications policy* is an oft-requested feature. Communications policy encompasses many different aspects of how Linkerd proxies a request, including: how long should we wait for a service to process a request before timing out? What kinds of requests are safe to retry? Should we encrypt communication with TLS and which certificates should we use? And so on. - -Let’s take a look at how this policy can be used today, with the example of two services that have wildly different latencies. - -Starting from a fresh Kubernetes cluster, let’s deploy two services with different latencies. We can deploy the `hello world` microservice that we’re familiar with from the other posts in this series, with one small tweak: the `hello` service will be configured to add `500ms` of artificial latency. - -```yaml -- name: service -image: buoyantio/helloworld:0.1.2 -args: -- "-addr=:7777" -- "-text=Hello" -- "-target=world" -- "-latency=500ms" -``` - -Deploy it to your Kubernetes cluster with this command: - -```bash -kubectl apply -f https://raw.githubusercontent.com/BuoyantIO/linkerd-examples/master/k8s-daemonset/k8s/hello-world-latency.yml -``` - -(Note that the examples in these blog posts assume Kubernetes is running in an environment like GKE, where external loadbalancer IPs are available, and no CNI plugins are being used. Slight modifications may be needed for other environments, for example Minikube or CNI configurations with Calico/Weave.) - -Our next step will be to deploy the Linkerd service mesh. We’d like to add a timeout so that we can abort (and potentially retry) requests that are taking too long, but we’re faced with a problem. The `world` service is fast, responding in less than `100ms`, but the `hello` service is slow, taking more than `500ms` to respond. If we set our timeout just above `100ms`, requests to the `world` service will succeed, but requests to the `hello` service are guaranteed to timeout. On the other hand, if we set our timeout above `500ms` then we’re giving the `world` service a much longer timeout than necessary, which may cause problems to *our* callers. - -To give each service an appropriate timeout, we can use Linkerd 1.0’s new fine-grained per-service configuration to set a separate communications policy for each service: - -```yaml -service: -kind: io.l5d.static -configs: -- prefix: /svc/hello - totalTimeoutMs: 600ms -- prefix: /svc/world - totalTimeoutMs: 100ms -``` - -This configuration establishes the following timeouts: - -{{< fig - alt="image" - title="image" - src="/uploads/2017/07/buoyant-k8s-hello-world-timeouts.png" >}} - -We can deploy the Linkerd service mesh with this configuration using this command: - -```bash -kubectl apply -f https://raw.githubusercontent.com/BuoyantIO/linkerd-examples/master/k8s-daemonset/k8s/linkerd-latency.yml -``` - -Once Kubernetes provisions an external LoadBalancer IP for Linkerd, we can test requests to both the `hello` and `world` services and make sure both are operating within their timeouts. - - -```bash -$ L5D_INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -$ curl $L5D_INGRESS_LB:4140 -H "Host: hello" -Hello (10.196.1.242) world (10.196.1.243)!! -$ curl $L5D_INGRESS_LB:4140 -H "Host: world" -world (10.196.1.243)!! -``` - - -(Note that the first few requests will be slower because they must establish connections and may time out. Subsequent requests should be successful.) - -We can also check that timeouts are being enforced by artificially increasing the latency of the `hello` and `world` services until they violate their timeouts. We’ll start by increasing the artificial latency of the `hello` service to `600ms`. Given that the timeout for the `hello` service is `600ms`, this leaves zero overhead for the `hello` service to do things like call the `world` service and any requests should therefore timeout: - -```bash -$ curl "$L5D_INGRESS_LB:4140/setLatency?latency=600ms" -X POST -H "Host: hello" -ok -$ curl $L5D_INGRESS_LB:4140 -H "Host: hello" -exceeded 600.milliseconds to unspecified while waiting for a response for the request, including retries (if applicable). Remote Info: Not Available -``` - -Similarly, we can add `100ms` of artificial latency to the `world` service which should cause all requests to the `world` service to violate the `100ms` timeout. - -```bash -$ curl "$L5D_INGRESS_LB:4140/setLatency?latency=100ms" -X POST -H "Host: world" -ok -$ curl $L5D_INGRESS_LB:4140 -H "Host: world" -exceeded 100.milliseconds to unspecified while waiting for a response for the request, including retries (if applicable). Remote Info: Not Available -``` - -Success! We’ve set appropriate timeouts for each service, and demonstrated the expected behavior when these timeouts are (and are not) violated. - -In this example, we’ve only been configuring timeouts, but, as you might expect, this same pattern can be used to configure any kind of per-service communications policy, including [response classification](https://linkerd.io/config/1.0.0/linkerd/index.html#http-response-classifiers) or [retry budgets](https://linkerd.io/config/1.0.0/linkerd/index.html#retries). - -## LOOKING FORWARD - -In this post, we’ve seen an example of using Linkerd’s new per-service communications policy to handle two services with wildly different expected latencies. The introduction of per-service communications policy solves some immediate use cases for Linkerd users. But what we’ve seen here is just the beginning of communications policy control in Linkerd—this policy was developed from the ground up in a way that it can be dynamically updatable, with the explicit goal of making it a part of the service mesh API. - -In the coming months, we’ll add this communications policy to Linkerd’s service mesh API, alongside routing policy. Looking still further, other forms of policy—including [rate limiting](https://github.com/linkerd/linkerd/issues/1006), [request forking policy](https://github.com/linkerd/linkerd/issues/1277), and [security policy](https://github.com/linkerd/linkerd/issues/1276)—are all on [the Linkerd roadmap](https://github.com/linkerd/linkerd/projects/3), and will form more of Linkerd’s service mesh API. A consistent, uniform, well-designed service mesh API with comprehensive control over Linkerd’s runtime behavior is central to our vision of Linkerd as the service mesh for cloud native applications. - -There’s a lot of very exciting work ahead of us and it won’t be possible without input and involvement from the amazing Linkerd community. Please comment on an issue, discuss your use case on the [Linkerd Support Forum](https://linkerd.buoyant.io/), hit us up on [Slack](https://slack.linkerd.io/), or—best of all—submit a [pull request](https://github.com/linkerd/linkerd/pulls)! diff --git a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-xi-egress.md b/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-xi-egress.md deleted file mode 100644 index 3ff665e2d6..0000000000 --- a/linkerd.io/content/blog/a-service-mesh-for-kubernetes-part-xi-egress.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -slug: 'a-service-mesh-for-kubernetes-part-xi-egress' -title: 'A Service Mesh For Kubernetes Part XI: Egress' -aliases: - - /2017/06/20/a-service-mesh-for-kubernetes-part-xi-egress/ -author: 'alex' -date: Tue, 20 Jun 2017 23:36:51 +0000 -draft: false -featured: false -thumbnail: /uploads/kubernetes11_featured_Twitter_ratio.png -tags: [Linkerd, linkerd, News, tutorials] ---- - -In previous posts in this series, we’ve demonstrated how Linkerd can act as an *ingress* to a Kubernetes cluster, handling all requests coming from outside of the cluster and sending them to the appropriate Kubernetes services. - -In this post we’ll explore how Linkerd can be used as an *egress* as well, handling requests from services within the cluster to services running outside of the cluster, whether those are legacy non-Kubernetes systems or third-party APIs outside the firewall. - -Using the Linkerd service mesh for egress gives you a uniform, consistent model of request handling independent of where those requests are destined. It also lets you apply the benefits of Linkerd, such as adaptive load balancing, observability, circuit breakers, dynamic routing, and TLS, to services which are running outside of Kubernetes. - -This article is one of a series of articles about [Linkerd](https://linkerd.io/), [Kubernetes](https://kubernetes.io/), and service meshes. Other installments in this series include: - -1. [Top-line service metrics]({{< ref - "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) -2. [Pods are great, until they’re not]({{< ref - "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}}) -3. [Encrypting all the things]({{< ref - "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}}) -4. [Continuous deployment via traffic shifting]({{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}}) -5. [Dogfood environments, ingress, and edge routing]({{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}}) -6. [Staging microservices without the tears]({{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}}) -7. [Distributed tracing made easy]({{< ref - "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}}) -8. [Linkerd as an ingress controller]({{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}}) -9. [gRPC for fun and profit]({{< ref - "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}}) -10. [The Service Mesh API]({{< ref - "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}}) -11. [Egress]({{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}}) (this article) -12. Retry budgets, deadline propagation, and failing gracefully -13. Autoscaling by top-line metrics - -## EGRESS NAMING WITH DNS - -Linkerd provides a uniform naming abstraction that encompasses many different service discovery systems, including Kubernetes, Marathon, Consul, and ZooKeeper, as well as DNS and raw IP address resolution. When a service asks Linkerd to route a request to “foo”, Linkerd can be configured to resolve the name “foo” in a variety of ways, including arbitrary combinations of any of the above options. (For more on this, read about Linkerd’s powerful and sophisticated routing languages called [dtabs](https://linkerd.io/in-depth/dtabs/).) - -In Kubernetes terms, most egress names resolve to non-Kubernetes services and must be resolved via DNS. Thus, the most straightforward way to add egress to the Linkerd service mesh is to add DNS lookup as a fallback mechanism. To accomplish this, we’ll start with our standard service mesh configuration, but tweak it with an additional rule: if we get a request for a service that doesn’t exist in Kubernetes, we’ll treat the service name as an external DNS name and send the request to that external address. - -In the following sections, we’ll talk about how this actually works in terms of Linkerd’s configuration. If you just want to play with the end result, jump right to the “Trying it out” section at the bottom. - -## SPLITTING THE KUBERNETES NAMER - -There are a number of changes we need to make to the Linkerd config we’ve been developing in earlier examples to make this happen. - -In our basic service mesh config, we attached the DaemonSet transformer to the outgoing router’s interpreter. This was so that all requests from one service would be sent to the Linkerd DaemonSet pod of the destination service (read more about that in [Part II of this series][part-ii]). However, this is not appropriate for external services because they are running outside of Kubernetes and don’t have a corresponding Linkerd DaemonSet pod. Therefore, we must take the DaemonSet transformer off of the interpreter and put it directly on the `io.l5d.k8s namer`. This makes the DaemonSet transformer apply only to Kubernetes names and not to external ones. We must also add a second `io.l5d.k8s` namer without the DaemonSet transformer for the incoming router to use. - -```yaml -namers: -# This namer has the daemonset transformer "built-in" -- kind: io.l5d.k8s - prefix: /io.l5d.k8s.ds # We reference this in the outgoing router's dtab - transformers: - - kind: io.l5d.k8s.daemonset - namespace: default - port: incoming - service: l5d -# The "basic" k8s namer. We reference this in the incoming router's dtab -- kind: io.l5d.k8s -``` - -## UPDATING THE DTAB - -With those namers in place, we can now update the outgoing dtab to use the DaemonSet transformed Kubernetes namer and add dtab fallback rules to treat the service name as a DNS name. We use the io.buoyant.portHostPfx rewriting namer to extract the port number from the hostname (or use 80 by default if unspecified). - -```yaml -dtab: | -/ph => /$/io.buoyant.rinet ; # Lookup the name in DNS -/svc => /ph/80 ; # Use port 80 if unspecified -/srv => /$/io.buoyant.porthostPfx/ph ; # Attempt to extract the port from the hostname -/srv => /#/io.l5d.k8s.ds/default/http ; # Lookup the name in Kubernetes, use the linkerd daemonset pod -/svc => /srv ; -/svc/world => /srv/world-v1 ; -``` - -Recall that later dtab entries have higher priority so this will prefer: - -1. The linkerd daemonset pod of the Kubernetes service, if it exists -2. An external DNS service on the specified port -3. An external DNS service on port 80 if no port specified - -{{< fig - alt="egress" - title="Egress" - src="/uploads/2017/07/buoyant-k8s-egress-dtab.png" >}} - -## DON’T FORGET TLS! - -Most services running on the open internet don’t allow plain HTTP. We’ll use Linkerd’s fine-grained client configuration to add TLS to all egress requests that use port 443. - -```yaml -client: -kind: io.l5d.static -configs: -- prefix: "/$/io.buoyant.rinet/443/{service}" - tls: - commonName: "{service}" -``` - -Putting all that together gives us [this config](https://github.com/linkerd/linkerd-examples/blob/master/k8s-daemonset/k8s/linkerd-egress.yaml). Let’s try it out. - -## TRYING IT OUT - -Deploy our usual `hello world` microservice and updated Linkerd service mesh using these commands: - -```bash -kubectl apply -f https://raw.githubusercontent.com/BuoyantIO/linkerd-examples/master/k8s-daemonset/k8s/hello-world.yml -kubectl apply -f https://raw.githubusercontent.com/BuoyantIO/linkerd-examples/master/k8s-daemonset/k8s/linkerd-egress.yaml -``` - -Once Kubernetes provisions an external LoadBalancer IP for Linkerd, we can test requests to the `hello` and `world` services as well as external services running outside of Kubernetes. - -```bash -L5D_INGRESS_LB=$(kubectl get svc l5d -o jsonpath="{.status.loadBalancer.ingress[0].*}") -``` - -A request to a Kubernetes service: - -```bash -$ curl $L5D_INGRESS_LB:4140 -H "Host: hello" -Hello (10.196.1.242) world (10.196.1.243)!! -``` - -A request to an external service, using port 80 by default: - -```bash -$ curl -sI $L5D_INGRESS_LB:4140/index.html -H "Host: linkerd.io" | head -n 1 -HTTP/1.1 301 Moved Permanently -``` - -A request to an external service using an explicit port and HTTPS: - -```bash -$ curl -sI $L5D_INGRESS_LB:4140/index.html -H "Host: linkerd.io:443" | head -n 1 -HTTP/1.1 200 OK -``` - -## CAVEAT - -In the above configuration, we assume that the Linkerd DaemonSet pods are able to route to the external services in the first place. If this is not the case, e.g. if you have strict firewall rules that restrict L3/L4 traffic, you could instead set up a dedicated egress cluster of Linkerd instances running on nodes with access to the external services. All egress requests would then need to be sent to the egress cluster. - -## CONCLUSION - -By using Linkerd for egress, external services are able to share the same benefits that services running inside of Kubernetes get from the Linkerd service mesh. These include adaptive load balancing, circuit breaking, observability, dynamic routing, and TLS initiation. Most importantly, Linkerd gives you a uniform, consistent model of request handling and naming that’s independent of whether those requests are destined for internal services, or for external, third-party APIs. - -If you have any questions about using Linkerd for egress, please come ask on the [Linkerd Support Forum](https://linkerd.buoyant.io/) or [Slack](https://slack.linkerd.io/)! - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} diff --git a/linkerd.io/content/blog/announcing-conduit-0-1-1.md b/linkerd.io/content/blog/announcing-conduit-0-1-1.md deleted file mode 100644 index d6a3816c2c..0000000000 --- a/linkerd.io/content/blog/announcing-conduit-0-1-1.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -slug: 'announcing-conduit-0-1-1' -title: 'Announcing Conduit 0.1.1' -aliases: - - /2017/12/20/announcing-conduit-0-1-1/ -author: 'gmiranda23' -date: Wed, 20 Dec 2017 23:11:11 +0000 -draft: false -featured: false -thumbnail: /uploads/version_conduit_011.png -tags: [conduit, Conduit, Release Notes, releases] ---- - -Conduit is now part of Linkerd! [Read more >]({{< relref -"conduit-0-5-and-the-future" >}}) - -I'm excited to announce that the first inaugural [post-launch release of Conduit](https://github.com/runconduit/conduit/releases/tag/v0.1.1) is now available. - -We've been blown away by your feedback and we're working hard to quickly help you with the problems you've told us matter most. This release is focused on making it easier to get started with Conduit and on better supporting your existing applications. In the same way you've come to expect a steady stream of [rapid Linkerd releases](https://github.com/linkerd/linkerd/releases), we're gearing up to also do that for Conduit. Two weeks after launch and we're shipping new features! - -With this release, Conduit can now be installed on Kubernetes clusters using RBAC. Conduit can also now support existing gRPC and HTTP/2 applications that communicate with other non-HTTP/2 or non-gRPC services. Use the `--skip-outbound-ports` flag to bypass proxying for specific outbound ports when setting up individual services you want Conduit to manage with the `conduit inject` command. - -In addition to new features, several existing features have been enhanced. Output from the `conduit tap` command has been reformatted to make it easier to parse with common UNIX command line utilities. Service calls can now be routed without the use of a fully-qualified domain name, meaning you can make relative lookups like those supported by default in kube-dns. The Conduit console has been updated to better support both large deployments and deployments that don't have any inbound or outbound traffic. - -Thanks for all your awesome suggestions and keep them coming! A great way to tell us what you think is by [opening issues via Github](https://github.com/runconduit/conduit) or by joining the [Linkerd Slack group](http://linkerd.slack.com) and popping into #conduit to talk to us directly. Try out the new Conduit for yourself with our [getting started guide](https://conduit.io/getting-started/). diff --git a/linkerd.io/content/blog/announcing-conduit-0-3-conduit-roadmap.md b/linkerd.io/content/blog/announcing-conduit-0-3-conduit-roadmap.md deleted file mode 100644 index de9a108d90..0000000000 --- a/linkerd.io/content/blog/announcing-conduit-0-3-conduit-roadmap.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -slug: 'announcing-conduit-0-3-conduit-roadmap' -title: 'Announcing Conduit 0.3 and the Conduit Roadmap' -author: 'william' -date: Wed, 21 Feb 2018 20:47:41 +0000 -draft: false -featured: false -thumbnail: /uploads/version_conduit_030.png -tags: [conduit, Conduit, News, Release Notes] ---- - -Conduit is now part of Linkerd! [Read more >]({{< ref -"conduit-0-5-and-the-future" >}}) - -Today we’re very happy to announce the release of Conduit 0.3! With this release, Conduit moves from _experimental_ to _alpha_---meaning that we’re ready for some serious testing and vetting from you. [Full release notes are here](https://github.com/runconduit/conduit/releases/tag/v0.3.0). - -Conduit 0.3 focuses heavily on production hardening of Conduit’s telemetry system, which automatically measures and aggregates service-level success rates, latency distributions, and request volumes. Conduit should “just work” for most apps on Kubernetes 1.8 or 1.9 without configuration, and should support Kubernetes clusters with hundreds of services, thousands of instances, and hundreds of RPS per instance. Conduit publishes top-line metrics for most HTTP/1.x and gRPC services without any configuration, and if you have your own Prometheus cluster, you can now also [export those metrics](https://conduit.io/prometheus) to it directly. - -Conduit 0.3 also features _load aware_ request-level load balancing, by which Conduit automatically sends requests to service instances with the fewest pending requests. This should improve application performance compared to the default layer 4 load balancing in Kubernetes, especially for applications under load. - -Most importantly, as of 0.3, we’re opening up Conduit development and planning. We’ve published the much-requested [Conduit roadmap](https://conduit.io/roadmap/), and we’re tracking upcoming [issues and milestones](https://github.com/runconduit/conduit/milestones) in GitHub. We’ve also launched new mailing lists: [conduit-users](https://groups.google.com/forum/#!forum/conduit-users), [conduit-dev](https://groups.google.com/forum/#!forum/conduit-dev), and [conduit-announce](https://groups.google.com/forum/#!forum/conduit-announce), which we’ll be using to plan and coordinate Conduit development. - -We hope these changes will make it even easier to get involved. If you want to participate, please subscribe to the mailing lists above, get familiar with the [Conduit README](https://github.com/runconduit/conduit/blob/master/README.md), and check out the [GitHub issues marked “help wanted”](https://github.com/runconduit/conduit/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22)! - -Finally, while Conduit is now in alpha, we don’t expect it to stay that way for long. We’re making a concerted effort to make Conduit production-ready as rapidly as possible. Of course, this all depends on you. [Try Conduit](https://conduit.io/) on your own Kubernetes apps, give us feedback, and help us get there! diff --git a/linkerd.io/content/blog/announcing-conduit-support-http-1-x-tcp.md b/linkerd.io/content/blog/announcing-conduit-support-http-1-x-tcp.md deleted file mode 100644 index 747eea7e6f..0000000000 --- a/linkerd.io/content/blog/announcing-conduit-support-http-1-x-tcp.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -slug: 'announcing-conduit-support-http-1-x-tcp' -title: 'Announcing Conduit support for HTTP/1.x and TCP' -aliases: - - /2018/02/01/announcing-conduit-support-http-1-x-tcp/ -author: 'gmiranda23' -date: Thu, 01 Feb 2018 16:29:10 +0000 -draft: false -featured: false -thumbnail: /uploads/version_conduit_020.png -tags: [conduit, Conduit, HTTP/1, News, Release Notes, TCP] ---- - -Conduit is now part of Linkerd! [Read more >]({{< relref -"conduit-0-5-and-the-future" >}}) - -We’re happy to announce that the latest Conduit release delivers on a big project milestone. With the 0.2.0 release, Conduit now includes support for HTTP/1.x and TCP traffic in addition to the existing HTTP/2 support. That means Conduit can now support most of your Kubernetes applications right out of the box. - -One of the things we’re most excited about in this release is the continued progress we’re making on simplifying the process of getting started with a service mesh. Conduit focuses on minimizing the level of effort needed to make a service mesh operable by aiming for a “zero config” approach to management. In other words, you shouldn’t have to get bogged down in configuration options just to get the basic visibility, management, and control you need. - -In the 0.2.0 release, you’ll notice that Conduit isn’t entirely zero config yet, but we’re getting pretty close. Some services still need manual configuration for now. Notably, if you’re using WebSockets or a protocol where the server sends traffic prior to the client (e.g. non-TLS encrypted MySQL or SMTP connections), you still need to manage those exceptions with some config. You can find more details in the [release notes](https://github.com/runconduit/conduit/releases/tag/v0.2.0). - -Conduit is still in alpha, so there’s a lot of work to be done before it’s ready for production workloads. But the development pace for Conduit is exceeding our expectations and we have even better things for you around the corner in the upcoming 0.3 milestone. Stay tuned for announcements on what to expect as Conduit heads down the road to production. - -[Try it for yourself](https://conduit.io/getting-started/) and let us know what you think! Join us in the #conduit channel in the [Linkerd Slack group](http://linkerd.slack.com) to chat with us directly. Keep the feedback coming! We’re thrilled with the response to Conduit so far and we’re excited to keep up with your suggestions. diff --git a/linkerd.io/content/blog/announcing-linkerd-1-0.md b/linkerd.io/content/blog/announcing-linkerd-1-0.md deleted file mode 100644 index 7395c791df..0000000000 --- a/linkerd.io/content/blog/announcing-linkerd-1-0.md +++ /dev/null @@ -1,205 +0,0 @@ ---- -slug: 'announcing-linkerd-1-0' -title: 'Announcing Linkerd 1.0' -aliases: - - /2017/04/25/announcing-linkerd-1-0/ -author: 'oliver' -date: Tue, 25 Apr 2017 23:36:00 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_version_1_featured.png -tags: [Linkerd, linkerd, News] ---- - -Today, we’re thrilled to announce Linkerd version 1.0. A little more than one year from our initial launch, Linkerd is part of the [Cloud Native Computing Foundation](https://cncf.io/) and has a thriving community of contributors and users. Adopters range from startups like Monzo, which is disrupting the UK banking industry, to high scale Internet companies like [Paypal](https://paypal.com/), [Ticketmaster](https://ticketmaster.com/), and [Credit Karma](https://creditkarma.com/), to companies that have been in business for hundreds of years like Houghton Mifflin Harcourt. - -A 1.0 release is a meaningful milestone for any open source project. In our case, it’s a recognition that we’ve hit a stable set of features that our users depend on to handle their most critical production traffic. It also signals a commitment to limiting breaking configuration changes moving forward. - -It’s humbling that our little project has amassed such an amazing group of operators and developers. I’m continually stunned by the features and integrations coming out of the Linkerd community; and there’s simply nothing more satisfying than hearing how Linkerd is helping teams do their jobs with a little less fear and uncertainty. - -## THE SERVICE MESH - -Linkerd is a *service mesh* for cloud native applications. As part of this release, we wanted to define what this actually meant. My cofounder William Morgan has a writeup in another post we released today, [What’s a service mesh? And why do I need one?](/2017/04/25/whats-a-service-mesh-and-why-do-i-need-one/) - -## NEW FEATURES - -Beyond stability and performance improvements, Linkerd 1.0 has a couple new features worth talking about. - -This release includes a substantial change to the way that routers are configured in Linkerd. New plugin interfaces have been introduced to allow for much finer-grained policy control. - -### PER-SERVICE CONFIGURATION - -There is a new section in the router config called `service` where service-level parameters may be configured. This parallels the `client` sections where client-level parameters are configured. The current parameters that can be specified in the `service` section are: - -- `totalTimeoutMs` -- `retries` -- `responseClassifier` - -```yml -routers: - - protocol: http - service: - totalTimeoutMs: 200 - retries: - budget: - minRetriesPerSec: 5 - percentCanRetry: 0.5 - ttlSecs: 15 - backoff: - kind: jittered - minMs: 10 - maxMs: 10000 - responseClassifier: - kind: io.l5d.http.retryableRead5XX -``` - -With this change, a router now has three main subsections: - -- `servers` — where the router listens for incoming requests -- `service` — the logical destination of the request based on the identifier -- `client` — where the router sends outgoing requests to concrete destinations - -### PER-CLIENT CONFIGURATION - -Prior to version 1.0 any client configuration such as timeouts or TLS would apply globally to all clients. We now support the ability to configure clients in a more granular way by specifying `kind: io.l5d.static` in the client section and providing a list of configs. For example: - -```yml -routers: - - protocol: http - client: - kind: io.l5d.static - configs: - - prefix: / - requestAttemptTimeoutMs: 1000 - failFast: true - - prefix: /#/io.l5d.k8s/default/http/hello - requestAttemptTimeoutMs: 300 - - prefix: /#/io.l5d.k8s/default/http/world - failureAccrual: - kind: none - failFast: false -``` - -Each item in the list of configs must specify a prefix and some parameters. Those parameters will apply to all clients with an id that matches the prefix. In the example above, the first config with prefix `/` applies to all clients. The next two configs apply to the `hello` and `world` clients respectively. If a client matches more than one config, all matching configs will be applied with configs later in the file taking precedence over earlier ones. For example, the `hello` client overrides the `requestAttemptTimeoutMs`property to `300` whereas the `world` client inherits the `1000` value from the first config. - -If you don’t specify `kind: io.l5d.static` then `kind: io.l5d.global` will be assumed and you can specify client configuration directly on the client object which will apply globally to all clients. - -```yml -routers: - - protocol: http - client: - requestAttemptTimeoutMs: 1000 - failFast: true -``` - -This same fine-grained level of control applies to the new `service` section as well. In the `service` configs, the `prefix` is compared to the service name i.e. the name produced by the identifier (which typically starts with `/svc`). - -```yml -routers: - - protocol: http - service: - kind: io.l5d.static - configs: - - prefix: /svc - totalTimeout: 1000 - responseClassifier: - kind: io.l5d.http.retryableRead5XX - - prefix: /svc/hello - responseClassifier: - kind: io.l5d.http.nonRetryable5XX - - prefix: /svc/world - totalTimeout: 300 -``` - -## UPGRADING GUIDE - -There are a couple changes you’ll have to make to your config files to move from pre-1.0 to 1.0. - -### IDENTIFIER KINDS - -The following identifier kinds have been renamed for consistency: - -- The `io.l5d.headerToken` id has been renamed to `io.l5d.header.token`. -- The `io.l5d.headerPath` id has been renamed to `io.l5d.header.path`. -- The `io.l5d.h2.ingress` id has been renamed to `io.l5d.ingress`. -- The `io.l5d.http.ingress` id has been renamed to `io.l5d.ingress`. - -### RESPONSE CLASSIFIER KINDS - -The following response classifier kinds have been renamed for consistency: - -- The `io.l5d.nonRetryable5XX` id has been renamed to `io.l5d.http.nonRetryable5XX`. -- The `io.l5d.retryableRead5XX` id has been renamed to `io.l5d.http.retryableRead5XX`. -- The `io.l5d.retryableIdempotent5XX` id has been renamed to `io.l5d.http.retryableIdempotent5XX`. - -### CLIENT AND SERVICE PARAMETERS - -The following parameter have moved or been renamed: - -- `failFast` moved from router to client -- `responseClassifier` moved from router to service -- `retries` moved from client to service -- `timeoutMs` moved from router to - - `requestAttemptTimeoutMs` in client - - `totalTimeoutMs` in service - -### TIMEOUTS - -The `timeoutMs` property has been split into two properties, `requestAttemptTimeoutMs`which is configured in the `client` section and `totalTimeoutMs` which is configured in the `service` section. - -`requestAttemptTimeoutMs` configures the timeout for each individual request or retry. As soon as this timeout is exceeded, the current attempt is canceled. If the request is retryable and the retry budget is not empty, a retry will be attempted with a fresh timeout. - -`totalTimeoutMs` configures the total timeout for the request and all retries. A running timer is started when the first request is attempted and continues running if the request is retried. Once this timeout is exceeded, the request is canceled and no more retries may be attempted. - -### TLS - -The client TLS section no longer has a `kind` parameter and instead can simply be configured with these 3 parameters: - -| Key | Default Value | Description | -| ------------------- | ------------------------------------------ | ------------------------------------------------------------------ | -| `disableValidation` | false | Enable this to skip hostname validation (unsafe). | -| `commonName` | _required_ unless disableValidation is set | The common name to use for all TLS requests. | -| `trustCerts` | empty list | A list of file paths of CA certs to use for common name validation | - -Fine-grained client configuration can be used to only configure TLS for certain clients. Furthermore, segments from the prefix can be captured into variables and used in the `commonName`. For example: - -```yml -routers: - - protocol: http - client: - kind: io.l5d.static - configs: - - prefix: /#/io.l5d.k8s/default/http/{service} - tls: - commonName: '{service}.linkerd.io' -``` - -### METRICS - -The following metrics scopes have changed names. You will need to update any consumers of these metrics such as dashboards or alerts. - -- `rt/*/dst/id` has changed to `rt/*/service` -- `rt/*/dst/path` has changed to `rt/*/client` -- `rt/*/dst/id/*/path` has changed to `rt/*/client/*/service` -- `rt/*/srv` has changed to `rt/*/server` - -These three metrics scopes (server, service, client) mirror the three main subsections of the router config (servers, service, client). - -### TRACE ANNOTATIONS - -The following trace annotations have changed names: - -- `dst.id` has changed to `client` -- `dst.path` has changed to `residual` -- `namer.path` has changed to `service` - -### HTTP HEADERS - -The following outgoing request headers have changed names: - -- `l5d-dst-logical` has changed to `l5d-dst-service` -- `l5d-dst-concrete` has changed to `l5d-dst-client` - -## THANKS - -Linkerd is only possible thanks to the community of amazing people around it. I’d like to thank everyone who helps in [the Linkerd Slack](https://slack.linkerd.io/), files issues, and contributes pull requests. The 1.0 release was made possible by contributions from Amédée d’Aboville, Zack Angelo, Ian Macalinao, Alexander Pakulov, Jordan Taylor, and [users like you](https://github.com/linkerd/linkerd/blob/master/CONTRIBUTING.md)! diff --git a/linkerd.io/content/blog/announcing-the-linkerd-graalvm-working-group.md b/linkerd.io/content/blog/announcing-the-linkerd-graalvm-working-group.md deleted file mode 100644 index 4d67ca427e..0000000000 --- a/linkerd.io/content/blog/announcing-the-linkerd-graalvm-working-group.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -slug: 'announcing-the-linkerd-graalvm-working-group' -title: 'Announcing the Linkerd + GraalVM working group' -aliases: - - /2018/06/04/announcing-the-linkerd-graalvm-working-group/ -author: 'gmiranda23' -date: Mon, 04 Jun 2018 16:47:09 +0000 -thumbnail: /uploads/linkerd-graalvm-working-group.png -draft: false -featured: false -tags: [Linkerd, linkerd, News] ---- - -This Wednesday, we’re kicking off a new community working group to get Linkerd running on [GraalVM](https://www.graalvm.org/). The goal of the working group is to get Linkerd compiled into a native executable with GraalVM, which should result in a massive reduction in its memory footprint. Details below. - -GraalVM is a universal virtual machine for running applications written in many languages including JVM-based languages like Java and Scala. It includes a [native-image](http://www.graalvm.org/docs/reference-manual/aot-compilation/) tool that enables ahead-of-time (AOT) compilation of Java applications into native executables. As opposed to the JVM’s traditional just-in-time (JIT) approach of compiling code at run time, the AOT approach pre-compiles into efficient machine code. This should result in two big changes for Linkerd: faster startup times and a reduced memory footprint. Because AOT eliminates the need to include infrastructure to load and optimize code at runtime, a GraalVM Linkerd should require significantly less memory to run. There may also be additional advantages including more predictable performance and less total CPU usage. - -So far, it’s unclear exactly how big of an improvement we can expect to see upon successful completion of this work. Using similar techniques, the GraalVM team was able to get a [7x improvement in the memory footprint required for Netty](https://medium.com/graalvm/instant-netty-startup-using-graalvm-native-image-generation-ed6f14ff7692). The aim of the Linkerd+GraalVM working group is to answer this question by collaborating to make Linkerd work in a similar manner. - -An early proof of concept has been put together by [Georgi Khomeriki](https://github.com/flatmap13) (Walmart Labs). The [Linkerd+GraalVM working group will meet this Wednesday](https://lists.cncf.io/g/cncf-linkerd-graal-wg/message/16), June 6 from 8:00-9:00 (UTC-7) to review his work, identify next steps, and get started on this exciting new project. If you’d like to participate, you can join the group on [Google Hangouts](http://meet.google.com/gtz-htoa-mik) or dial-in via information on the [group invite](https://lists.cncf.io/g/cncf-linkerd-graal-wg/message/16). Hope to see you then! diff --git a/linkerd.io/content/blog/bringing-service-communication-out-of-the-shadows-pt-1.md b/linkerd.io/content/blog/bringing-service-communication-out-of-the-shadows-pt-1.md deleted file mode 100644 index 81486d807d..0000000000 --- a/linkerd.io/content/blog/bringing-service-communication-out-of-the-shadows-pt-1.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -slug: 'bringing-service-communication-out-of-the-shadows-pt-1' -title: 'Bringing Service Communication Out of the Shadows - Part 1' -aliases: - - /2017/10/26/bringing-service-communication-out-of-the-shadows-pt-1/ -author: 'gmiranda23' -date: Thu, 26 Oct 2017 08:05:40 +0000 -thumbnail: /uploads/shadows1_featured_Twitter_ratio.png -draft: false -featured: false -tags: [Buoyant, buoyant, Linkerd, News, Video] ---- - -It’s been an interesting few months for the service mesh. First, it’s now an ecosystem! [Istio 0.2](https://github.com/istio/istio/milestone/2) was just released, NGINX recently [launched its service mesh](https://www.nginx.com/blog/introducing-nginx-application-platform/), Uber is considering open-sourcing its [Catalyst service mesh](https://thenewstack.io/ubers-catalyst-service-mesh-provides-visibility-speed/), and [Envoy is now hosted by the CNCF](https://www.cncf.io/blog/2017/09/13/cncf-hosts-envoy/). At Buoyant, we’re encouraged by the recent surge in service mesh technology. - -Second, that surge validates our “service mesh” approach as a missing building block in the world of microservices. We’re encouraged to see the service mesh gain adoption. But the reality is that a lot of users out there still aren’t even entirely sure what problem a service mesh solves. - -In this series we’re going to unpack why this surge is happening now, take an in-depth look at the problems a service mesh solves, and look at where service mesh technology is going next. - -This is one article in a series of articles covering service mesh technology. Other installments in this series include: - -1. The problem we’re solving (this article) -2. [Making service requests visible]({{< ref - "bringing-service-communication-shadows-part-2" >}}) -3. Adding a layer of resilience -4. Security and implementation -5. Customer use cases -6. Future state & where the technology is going - -## Preface - -Before we get started, you should know what a service mesh is. If you don’t yet, you should check out a few links. Phil Calçado wrote an informative history of [the service mesh pattern](http://philcalcado.com/2017/08/03/pattern_service_mesh.html), Redmonk shared their [hot take on Istio and Linkerd](http://redmonk.com/jgovernor/2017/05/31/so-what-even-is-a-service-mesh-hot-take-on-istio-and-linkerd/), and—if you’re more the podcast type—The Cloudcast has an introduction to both [Linkerd](http://www.thecloudcast.net/2017/05/the-cloudcast-298-introduction-to.html?m=1) and [Istio](http://www.thecloudcast.net/2017/09/the-cloudcast-312-istio-routing-load.html?m=1). Collectively, those paint a pretty good picture. - -TL;DR, a service mesh is a dedicated infrastructure layer for handling service-to-service communication. The service mesh has two distinct components that each behave differently. The [data plane](https://medium.com/@mattklein123/the-universal-data-plane-api-d15cec7a) is the layer responsible for moving your data (aka requests) through your service topology in real time. When your apps make service-to-service calls, they are unaware of the data plane’s existence: it’s basically transparent. In practice, this layer is typically comprised of a series of interconnected proxies. - -A service mesh also provides a [control plane](https://medium.com/@mattklein123/the-universal-data-plane-api-d15cec7a): that’s where the magic happens. A control plane exposes new primitives you can use to control how your services communicate. Those primitives enable you to do some [fun things](https://istio.io/docs/tasks/) you couldn’t do before. - -When you (as a human) interact with a service mesh, you interact with the control plane. You use the new primitives to compose some form of policy: routing decisions, auth, rate limits, etc. The data plane reads your policies from the control plane and alters its behavior accordingly. - -That’s enough to get started. We’ll explore further in-depth later. - -Finally, this series focuses on the service mesh in a broad sense. This series isn’t a side-by-side feature comparison of specific tools. Unless absolutely necessary, I’ll skew toward using general examples instead of product-specific examples. - -## The problem - -Service-to-service communication lives in the shadows. There’s a lot we can infer about the state of service communication based on circumstantial evidence. Directly measuring the health of those requests at any given time is a challenge with no clear solution. - -You’re probably monitoring network performance stats somehow today. Those metrics tell you a lot about what’s happening in the lower level network layer: packet loss, transmission failures, bandwidth utilization, and so on. That’s important data. It tells you if your network is healthy. But it’s hard to infer anything about service-to-service communications from such low-level metrics. Directly monitoring the health of application service requests means looking further up the stack. - -You might use a latency monitoring tool—like [smokeping](http://www.smokeping.org)—to get closer to measuring service health. This breed of tools provide useful external availability data. But that’s an external view. If you notice suboptimal performance, troubleshooting issues means correlating those external measures with an internal data source like an application event stream log captured somewhere. You get closer to inferring how your services are behaving by triaging between data sources. But you still aren’t measuring service health directly. - -Using an in-band tool—like [tcpdump](http://www.tcpdump.org/)—gets you right into examining service communication where it happens: the packet level. Again, this is a low-level inspection tool, albeit a powerful one. To find the data that’s relevant, you have to filter out all normal traffic by looking for specific payloads, targets, ports, protocols, or other bits of known information. In any reasonable production setting, that’s a flood of data. You are searching for needles in a proverbial haystack. With enough sophisticated scaffolding, that search can be made more effective. Even then, you still need to correlate data from the types of tools named above to triage and infer the health of service-to-service communications. - -If you’ve managed production applications before, you probably know this dance well. And for a majority of us, these tools and tactics have mostly been good enough. Troubleshooting service communication can be an uncommon event since many monolithic applications make relatively few service requests and it’s often clear where they are coming from and going to. Investing time to create more elegant solutions to unearth what’s happening in that hidden layer simply hasn’t been worth it. - -Until microservices. - -## Managing requests for microservices - -When you start building out microservices, service-to-service communication becomes the default. In production, it’s not always clear where requests are coming from or where they’re going to. You become painfully aware of the service communication blindspot. - -Some development teams solve for that blindspot by building and embedding custom monitoring agents, control logic, and debugging tools into their service as communication libraries. And then they embed those into another service, and another, and another ([Jason McGee summarizes](http://www.thecloudcast.net/2017/09/the-cloudcast-312-istio-routing-load.html?m=1) this pattern well). - -The service mesh exists to decouple that communication logic from your applications. The service mesh provides the logic to monitor, manage, and control service requests by default, everywhere. It pushes that logic into a lower part of the stack where it can be more easily managed across your entire infrastructure. - -The service mesh doesn’t exist to manage parts of your stack that already have sufficient controls, like packet transport & routing at the TCP/IP level. The service mesh presumes that a usable (even if unreliable) network already exists. The scope of the service mesh is only that blind spot more easily seen by the shift to microservice architectures. - -If you’re asking yourself whether you need a service mesh, the first sign that you do is that you have a lot of services intercommunicating within your infrastructure. The second is that you have no direct way of determining the health of that intercommunication. Using only indirect measurements to infer what’s happening means you have a blindspot. You might have service requests failing right now and not even know it. - -The service mesh works for managing all service-to-service communication, but its value is particularly strong in the world of managing distributed cloud-native applications. - -## Visibility isn’t enough - -Shining a light into the darkness of service communication is the first step. Because the service mesh is implemented as a series of interconnected proxies, it makes sense to use that layer to directly measure and report the health of service-to-service communication. - -The two most common ways of setting up a service mesh (today) are to either deploy each proxy as a container sidecar, or deploy one proxy per physical host. Then, whenever your containerized applications make external service requests, they route through the new proxy. - -But visibility isn’t enough to run production microservices. Those services need to be resilient and secure. The implemented architecture of the service mesh also provides an opportunity to improve several problems where they occur. - -Before the service mesh, service communication logic has mostly been bundled into application code: open a socket, transmit data, retry if it fails, close the socket when you’re done, etc. By abstracting that logic and exposing primitives to control that behavior on an infrastructure level, you can decouple service communication from your applications. From a code perspective, all your apps now need to do is make a plain old external service call. - -On a global (or partial) infrastructure level, you can then decide how those communications occur. Should they all be TLS encrypted by default? If a service call fails, should it be retried, and how often for how long? Which critical service calls should be dynamically load-balanced to the most performant instances of a particular service? - -For example, the service mesh can simplify how you manage TLS certificates. Rather than baking those certs into every microservice application, you can handle that logic in the service mesh layer. Code all of your apps to make a plain HTTP call to external services. At the service mesh layer, specify the cert and encryption method to use when that call is transmitted over the wire and manage any exceptions on a per service basis. When you eventually need to update certificates, you handle that at the service mesh layer without needing to change any application code. - -The service mesh both simplifies your apps and gives you finer-grain control. You push management of all service requests down into an organization-wide set of intermediary proxies (or a ‘mesh’) that inherit a common behavior with a common management interface. - -## Service communication as a first-class citizen - -The data plane shines a light into the previously dark realm of service-to-service communications to make them visible and measureable. The control plane then exposes ways to more easily manage and control the behavior of that communication. Together, they bring service-to-service communication up to the level where any mission critical component of your infrastructure needs to be: managed, monitored, and controlled. - -Monolithic architectures have enabled service communication to live in the shadows for decades. But with microservices, that long hidden problem is one we can’t continue to live with anymore. The service mesh turns service communication into a first-class citizen within your application infrastructure. - -We’ve laid out a few benefits of the service mesh in this article. In the next installment of this series, we’ll explore how the various features of the service mesh are implemented in practice. diff --git a/linkerd.io/content/blog/bringing-service-communication-shadows-part-2.md b/linkerd.io/content/blog/bringing-service-communication-shadows-part-2.md deleted file mode 100644 index e0e3e50605..0000000000 --- a/linkerd.io/content/blog/bringing-service-communication-shadows-part-2.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -slug: 'bringing-service-communication-shadows-part-2' -title: 'Bringing Service Communication Out of the Shadows - Part 2' -aliases: - - /2017/11/20/bringing-service-communication-shadows-part-2/ -author: 'gmiranda23' -thumbnail: /uploads/shadows2_featured_Twitter_ratio.png -date: Mon, 20 Nov 2017 21:23:46 +0000 -draft: false -featured: false -tags: - [ - Buoyant, - buoyant, - cloud-native, - Industry Perspectives, - Linkerd, - runtime, - service mesh, - ] ---- - -In this part of our series, we’re going to begin focusing on how the various features of a service mesh are implemented in practice. - -This is one article in a series of articles covering service mesh technology. Other installments in this series include: - -1. [The problem we’re solving]({{< ref - "bringing-service-communication-out-of-the-shadows-pt-1" >}}) -2. Making service requests visible (this article) -3. Adding a layer of resilience -4. Security and implementation -5. Customer use cases -6. Future state & where the technology is going - -In the first installment of this series, we covered why a service mesh exists and what problems it solves. In the next few followup posts, we’ll explore how the service mesh is implemented in practice to highlight why specific concepts matter in the context of solving those problems. - -In this article, we look at how the service mesh introduces previously missing observability. - -## Making service traffic behavior visible - -The explicit goal of the service mesh is to move service communication out of the realm of invisible and implied infrastructure into the realm of first-class citizenship--where it can be monitored, controlled, and managed. - -Service communication has operated in the shadows, living in a blindspot enabled by the relative uniformity and predictability of traffic within monolithic applications. The shift to microservices makes living with that blindness unsustainable. When service-to-service communication becomes the default, it needs to be reliable. Cloud-native computing principles aim to run applications reliably on top of unreliable infrastructure: the underlying components can and will fail. When those failures occur, it should be easy to determine the cause and react appropriately. - -When running microservices in production, it’s not always clear where requests are coming from or where they’re going to. A plethora of data sources exist and their relationships are rarely defined and clear. Troubleshooting production service issues shouldn’t be an exercise in triaging observations from multiple sources and guesswork. What we need are tools that reduce that cognitive burden, not increase it. - -The first thing users typically get out of using a service mesh is observability where it previously didn’t exist: in the service communication layer. The service mesh helps developers and operators understand the distributed traffic flow of their applications. - -## Where observability occurs - -You’ll recall the [earlier distinction](https://buoyant.io/2017/10/26/bringing-service-communication-out-of-the-shadows-pt-1/) of the separate components in a service mesh: [the data plane and control plane](https://medium.com/@mattklein123/service-mesh-data-plane-vs-control-plane-2774e720f7fc). To summarize, the data plane touches every packet/request transmitted between services and it exposes primitives to specify desired behavior for things like service discovery, routing, retries, etc. The control plane is where you, as an operator, use those primitives to specify policy and configuration that informs how the data plane behaves. - -Some service mesh implementations pair separate products, like using Envoy (a data plane) with Istio (a control plane). Some service mesh implementations, like Linkerd, contain both a data plane and a control plane in one product. Confusingly for new users, those distinctions aren’t always clear since you can do things like take the data plane portion of Linkerd and integrate it with Istio. There’s more detail around those implementations that we’ll cover later. For now, the takeaway is that every service mesh solution needs both of these components. - -The data plane is not just where the data of service-to-service communication is exchanged, it’s also where telemetry data around that action is gathered. The service mesh gathers descriptive data about what it’s doing to provide observability at the wire level. Exactly which data is gathered varies between service mesh implementations. Generally you can expect to find top-line service metrics. - -Top-line service metrics are the measures you care about because they directly affect the business. It’s helpful to record bottom-line metrics like CPU and memory usage to triage events, but what should be triggering alerts are measures like a significant drop in success rates. In other words, while some metrics are useful for debugging, having anomalies in them isn’t what you want to be woken up about at 4am. The data plane is designed to observe measures like latency, request volume, response time, success/failure/retry counts, error types, load balancing statistics, and more: metrics that indicate services are unavailable. - -The data plane can then be polled by external metrics-collection utilities for aggregation. In some implementations, the control plane may act as an intermediary aggregator by collecting and processing that data before sending it to backends like Prometheus, InfluxDB, or statsd. That data can then be presented in any number of ways, including the popular choice of displaying it via dashboards using Grafana. - -## Where you notice observability - -Dashboards help you visualize trends when troubleshooting by presenting aggregated data in easily digestible ways. Their presence is handy when using a service mesh, so they’re often included as an implementation component. But that can also be confusing to new users. Where does the dashboarding backend fit into service mesh architecture? - -Envoy is a data plane and it supports [using Grafana](https://medium.com/@mattklein123/lyfts-envoy-dashboards-5c91738816b1). Istio is a control plane and it supports using Grafana. And Linkerd, which is both a data plane and a control plane, also supports [using Grafana](https://github.com/linkerd/linkerd-viz). Are dashboards part of the data plane or the control plane? - -The truth is, they’re not strictly a part of either. - -When you (as a human) interact with a service mesh, you typically interact with the control plane. Because dashboards help humans digest aggregated data more easily, it makes contextual sense for them to sit next to where you interact with the system. For example, Istio includes that backend as the Istio dashboard add-on component, Linkerd includes that as the linkerd-viz add-on, and (in the earlier example) Envoy presumes you already have you own metrics-collection backend and dashboards set up somewhere else. - -Make no mistake, any dashboard no matter where it’s implemented is reading data that was observed in the data plane. That’s where observability occurs, even if you notice the results somewhere else. - -## Beyond service metrics - -The service mesh provides visibility in new ways by presenting detailed histograms and metrics that give you a consistent and global view of application performance. Those metrics are available in both machine-parsable and human-readable formats. But beyond service health metrics, the service mesh also provides another useful layer of visibility. - -Service communication can often span multiple endpoints. For example, a seemingly simple request to a profile service may require child requests to other services like auth, billing, and additional resources to fulfill. Those services may also have their own additional requests to make in order to fulfill the parent request. - -{{< fig - alt="sample request tree" - title="sample request tree" - src="/uploads/2019/03/request_tree@2x.png" >}} - -If a request to any of the underlying services fails, the client only knows that its request to the profile service failed, but not where or why. External monitoring only exposes overall response time and (maybe) retries, but not individual internal operations. Those operations may be scattered across numerous logs, but a user interacting with the system may not even know where to look. In the above example, if there’s an intermittent problem with the audit service, there’s no easy way to tie that back to failures seen via the profile service unless an operator has clear knowledge of how the entire service tree operates. - -[Distributed tracing](https://opentracing.io/docs/) helps developers and operators understand the behavior of any application inside the service mesh. Requests routed by the data plane can be configured to trace every step (or “span”) they take when attempting to fulfill successfully. In other words, a trace is comprised of spans where each span corresponds to a service invoked by a request. - -The visualization above shows how these microservices fit together. But it doesn’t show time durations, parallelism, or dependent relationships. There’s also no way to easily show latency or other aspects of timing. A full trace span allows you to instead visualize every step required to fulfill a service request by correlating them in a manner like this: - -{{< fig - alt="sample request trace span" - title="sample request trace span" - src="/uploads/2019/03/response_trace@2x.png" >}} - -Each span corresponds to a service invoked during the execution of our request. Because the service mesh data plane proxies the calls to each underlying service, it’s already gathering data about each individual span like source, destination, latency, and response code. Without pre-requisite knowledge of the entire system, you can then more easily determine the exact source of any present issues. - -While the service mesh is in a position to easily provide instrumentation and produce data about individual spans, you still need another system--like Zipkin, OpenTracing, or Jaeger--to collect them and assemble the full trace. The specifics of how that trace span is assembled depends on the underlying implementation, which varies between different service mesh data planes. - -While the hooks exist to capture this data, you should note that application code changes are required in order to use this functionality. Your apps need to propagate and forward the required HTTP headers so that when the data plane sends span information to the underlying telemetry, the spans can be untangled and correlated back into a contiguous single trace. - -## Visibility by default - -Without any extra work, just by deploying the service mesh you get immediate out-of-the-box visibility of health metrics. More detailed granularity to clearly see otherwise obscured steps performed by each request can be achieved by making the small header modifications required to use distributed tracing. For now, that covers core visibility concepts. Later in the series, we’ll dive further into details when we look at customer specific use cases. - -Visibility for managing services is critical, but it’s also not enough. Your services also need resiliency. In the next installment of this series, we’ll explore how to use the primitives within a service mesh to improve your application’s resiliency. diff --git a/linkerd.io/content/blog/conduit-0-4-0-wheres-my-traffic.md b/linkerd.io/content/blog/conduit-0-4-0-wheres-my-traffic.md deleted file mode 100644 index 66136560ff..0000000000 --- a/linkerd.io/content/blog/conduit-0-4-0-wheres-my-traffic.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -slug: 'conduit-0-4-0-wheres-my-traffic' -title: 'Conduit 0.4.0: Where’s my traffic?' -aliases: - - /2018/04/20/conduit-0-4-0-wheres-my-traffic/ -author: 'franzi' -date: Fri, 20 Apr 2018 13:58:01 +0000 -draft: false -featured: false -thumbnail: /uploads/version_conduit_040.png -tags: [conduit, Conduit, kubernetes, monitoring, News, Release Notes] ---- - -Conduit is now part of Linkerd! [Read more >]({{< relref -"conduit-0-5-and-the-future" >}}) - -Earlier this week, we released [Conduit v0.4.0](https://github.com/runconduit/conduit/releases/tag/v0.4.0). This release has some significant improvements to the Prometheus-based telemetry system, and introduces some incredibly cool new tools for debugging microservices. - -Within 60 seconds from installation, Conduit now gives you preconfigured Grafana dashboards for every Kubernetes Deployment. These dashboards not only cover top-line metrics such as success rate, request volume, and latency distributions per service, they _break these metrics down per dependency_. This means you can easily answer questions like “where is the traffic for this service coming from?” and “what’s the success rate of Foo when it’s calling Bar?”, without having to change anything in your application. - -{{< fig - alt="conduit dashboard" - title="conduit dashboard" - src="/uploads/2018/08/Pasted-image-at-2018_04_20-09_28-AM-1024x930.png" >}} - -## Dude, where’s my traffic? - -Some of the most critical questions to answer in a microservices app are around the runtime dependencies between services. It's one thing to have success rate per service; it's quite another to understand _where_ traffic to a service is coming from, and _which_ dependencies of a service are exhibiting failures. Typically, these questions are also the _hardest_ to answer as well. But we knew we had an opportunity to make it easy with Conduit. - -To accomplish this, in 0.4.0, we moved Conduit’s telemetry system to a completely pull-based (rather than push-based) model for metrics collection. As part of this, we also modified Conduit’s Rust proxy to expose granular metrics describing the source, destination, and health of all requests. A pull-based approach reduces complexity in the proxy, and fits better into the model of the world that Prometheus expects. - -As a result, Conduit’s telemetry system is now incredibly flexible. You can dive into request rate, success rate, and latency metrics between any two Kubernetes deployments, pods, or namespaces. You can write arbitrary Prometheus queries on the result. And, of course, we wire everything through to the CLI as well. "Top" for microservices, anyone? - -{{< fig - alt="where is my car" - src="/uploads/2018/04/dude-wheres-my-car.gif" >}} - -## How it works in practice - -Let’s walk through a brief example. (_For a full set of installation instructions, see the official_ [_Conduit Getting Started Guide_](https://conduit.io/getting-started/).) - -First, install the Conduit CLI: - -```bash -curl https://run.conduit.io/install | sh -``` - -Next, install Conduit onto your Kubernetes cluster: - -```bash -conduit install | kubectl apply -f - -``` - -Finally, install the "emojivoto" demo app, and add it to the Conduit mesh: - -```bash -curl https://raw.githubusercontent.com/runconduit/conduit-examples/master/emojivoto/emojivoto.yml | conduit inject - | kubectl apply -f - -``` - -The demo app includes a `vote-bot` service that is constantly running traffic through the system. This AI-based bot is voting on its favorite emojis and is designed to slowly become more intelligent, and more cunning, over time. For safety reasons, we recommend you don’t let it run for more than a few days. ;-) Let's see how we can use Conduit to understand how traffic is flowing through the demo app. Start by seeing how the web service (technically, web Deployment) is doing: - -```bash -$ conduit stat -n emojivoto deployment web -NAME MESHED SUCCESS RPS LATENCY_P50 LATENCY_P95 LATENCY_P99 -web 1/1 90.00% 2.0rps 2ms 4ms 9ms -``` - -The success rate of requests is only 90%. There’s a problem here. But is it possible this is an upstream failure? We can find out looking at the success rate of the services our \`web\` deployment talks to. - -```bash -$ conduit stat deploy --all-namespaces --from web --from-namespace emojivoto -NAMESPACE NAME MESHED SUCCESS RPS LATENCY_P50 LATENCY_P95 LATENCY_P99 -emojivoto emoji 1/1 100.00% 2.0rps 1ms 2ms 2ms -emojivoto voting 1/1 72.88% 1.0rps 1ms 1ms 1ms -``` - -Here you see that `web` talks to both the `emoji` and the `voting` services. The success rate of the calls to `emoji` is 100%, but to `voting` its only 72.88%. Note that this command is displaying the success rate **only from** `web` to `emoji`, and **only from** `web` to `voting`. The aggregate success rate of the `emoji` and `voting` services might be different. With just a bit of digging, we’ve determined that the culprit is probably the `voting` service. Who else talks to the `voting` service? To find out, we can run the following command: - -```bash -$ conduit stat deploy --to voting --to-namespace emojivoto --all-namespaces -NAMESPACE NAME MESHED SUCCESS RPS LATENCY_P50 LATENCY_P95 LATENCY_P99 -emojivoto web 1/1 83.33% 1.0rps 1ms 2ms 2ms -``` - -The `voting` service is only called from the `web` service. So, by tracing dependencies from web, we now have a plausible target for our first investigation: the `voting` service is returning a 83% success rate when `web` is calling it. From here, we might look into the logs, traces, or other forms of deeper investigation into this service. - -{{< fig - alt="sweet" - src="/uploads/2018/08/dude_sweet.gif" >}} - -That’s just a sample of some of the things you can do with Conduit. If you want to dive deeper, try looking at the success rate across all namespaces; success rate for a single namespace, broken down by deployments across all namespaces that call into that namespace; or success rate of Conduit components themselves. The possibilities are endless (kinda)! We’ve also recorded a brief demo so you can see this in action. - -{{< youtube R5UDKgX72tg >}} - -## What’s next? - -In terms of metrics and telemetry, we’ll be extending these semantics to other Kubernetes objects, such as Pods and ReplicaSets in [upcoming releases](https://conduit.io/roadmap). We’ll also be making \`conduit tap\` work on these same objects, since \`tap\` and \`stat\` work beautifully together. We might also just have another fun command or two waiting in the wings, ready to show off the power of Conduit’s new telemetry pipeline. Stay tuned! - -_Special thanks to [Frederic Branczyk](https://github.com/brancz) for invaluable Prometheus help._ diff --git a/linkerd.io/content/blog/conduit-0-5-and-the-future.md b/linkerd.io/content/blog/conduit-0-5-and-the-future.md deleted file mode 100644 index e770779cd6..0000000000 --- a/linkerd.io/content/blog/conduit-0-5-and-the-future.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -slug: 'conduit-0-5-and-the-future' -title: 'Conduit 0.5.0 and the future of Conduit' -aliases: - - /2018/07/06/conduit-0-5-and-the-future/ -author: 'oliver' -date: Fri, 06 Jul 2018 16:41:57 +0000 -thumbnail: /uploads/a6d4b0bd-conduit.jpg -draft: false -featured: false -tags: [Conduit, Linkerd, News] ---- - -Today we're very happy to announce [Conduit 0.5.0](https://github.com/runconduit/conduit/releases/tag/v0.5.0), which introduces _zero-config_ automatic TLS between mesh'd pods (including certificate creation and distribution). This means that most Kubernetes users can now encrypt internal HTTP communication between their service in just two simple commands. - -We're also happy to announce that 0.5.0 will be the last major release of Conduit. Conduit is [graduating into the Linkerd project](https://github.com/linkerd/linkerd/issues/2018) to become the basis of [Linkerd](http://linkerd.io) 2.0. Read on for what this means! - -## Conduit 0.5.0: TLS for free - -We've been working hard on Conduit 0.5.0 for the past few months. This release introduces several oft-requested features, including support for [HTTP protocol upgrades](https://developer.mozilla.org/en-US/docs/Web/HTTP/Protocol_upgrade_mechanism) (Conduit now supports WebSockets!) and HTTP CONNECT streams. Most importantly, it introduces a new feature that enables TLS between Conduit proxies, allowing them to automatically encrypt application traffic. - -This new automatic TLS support is a major step towards Conduit's goal of providing reliability and security to Kubernetes applications "for free". While it's gated behind an experimental flag in this release, we'll be working hard to de-experimentify it in the near future, as well as extend its scope and capabilities. You can read more about Conduit's TLS design and upcoming plans in the [Conduit Automatic TLS documentation](https://conduit.io/automatic-tls/). - -## Conduit is merging with Linkerd - -Conduit 0.5.0 will be the last major release of Conduit. We’re happy to announce that Conduit is graduating into the Linkerd project to become the basis of Linkerd 2.0. Over the next few weeks, you’ll start to see some changes in the Conduit and Linkerd projects in order to prepare for this change. - -Why merge Conduit into Linkerd? When we launched Conduit in December 2017, our hypothesis was that we could build a dramatically simpler solution to the problems that we've spent the last several years helping Linkerd users tackle: monitoring, reliability, and security of their cloud native applications. Furthermore, we were pretty sure that we could do this using only a small fraction of the system resources that Linkerd requires. But this was a risky move, and it didn't feel right to our many Linkerd production users to call it "Linkerd" until we were sure it would be successful. - -Happily, after seven months of [iterating](https://blog.buoyant.io/2018/05/17/prometheus-the-right-way-lessons-learned-evolving-conduits-prometheus-integration/) on Conduit, it’s clear to us that Conduit is worthy of bearing the Linkerd name. Conduit's lightning-fast Rust proxies are ~10mb per instance, have sub-millisecond p99 latencies, and support HTTP/1.x, HTTP/2, and gRPC. Conduit can be installed in seconds on almost any Kubernetes cluster and applied incrementally to just the services you're interested in. [Conduit's telemetry support is best in class](https://blog.conduit.io/2018/04/20/conduit-0-4-0-wheres-my-traffic/) and, like TLS, comes "for free" without requiring any application changes. Most importantly, the [community around Conduit](https://github.com/runconduit/conduit/graphs/contributors) has dramatically ramped up over the past few months, with contributors, production users, and, of course, [lots of GitHub stars](http://www.timqian.com/star-history/#runconduit/conduit&linkerd/linkerd)! - -Over the coming weeks, [github.com/runconduit/conduit](https://github.com/runconduit/conduit) will be moved to [github.com/linkerd/linkerd2](https://github.com/linkerd/linkerd2); and the proxy component will be split into its own repo at [github.com/linkerd/linkerd2-proxy](https://github.com/linkerd/linkerd2-proxy). Once this change is made, we’ll stop publishing docker images to [gcr.io/runconduit](https://gcr.io/runconduit) and start publishing images to [gcr.io/linkerd-io](https://gcr.io/linkerd-io). After the merge, both Linkerd 1.x and 2.x lines will continue to be developed in parallel. Linkerd (both versions) will, of course, continue to be a CNCF member project. And we’ll be working hard on the next step: a Linkerd 2.0 GA release. - -There's a lot more we want to say about our plans for Linkerd 2.0, so please stay tuned. On behalf of both the Conduit and Linkerd maintainers, we’re incredibly excited about what this means for the future of Linkerd and the service mesh. Please drop into Linkerd [GitHub](https://github.com/linkerd/linkerd/issues/2018), [Slack](http://slack.linkerd.io) or [mailing list](https://groups.google.com/forum/#!forum/linkerd-users) with any feedback, questions, or concerns. This is a great time to get involved! diff --git a/linkerd.io/content/blog/conduit-ama-session-recap.md b/linkerd.io/content/blog/conduit-ama-session-recap.md deleted file mode 100644 index f315ca0bcf..0000000000 --- a/linkerd.io/content/blog/conduit-ama-session-recap.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -slug: 'conduit-ama-session-recap' -title: 'Conduit AMA session recap' -aliases: - - /2017/12/27/conduit-ama-session-recap/ -author: 'gmiranda23' -date: Wed, 27 Dec 2017 22:12:40 +0000 -draft: false -featured: false -thumbnail: /uploads/conduit_community_recap.png -tags: [Community, conduit, Conduit, News, Release Notes] ---- - -Conduit is now part of Linkerd! [Read more >]({{< relref -"conduit-0-5-and-the-future" >}}) - -Earlier this month we [announced Conduit](https://buoyant.io/2017/12/05/introducing-conduit/), the ultralight next-gen service mesh for Kubernetes. We’ve been blown away by the reception to Conduit and we got a chance to speak to many of you in-person at [KubeCon + CloudNativeCon](https://buoyant.io/2017/12/11/kubecon-cloudnativecon-north-america-2017-roundup/). To chat with those who couldn’t make it to Austin, we hosted an Ask Me Anything session via Slack with Buoyant co-founders William Morgan and Oliver Gould on Monday Dec 11. - -In case you missed it, we’re sharing a transcript here as well. The transcript has been edited for brevity and clarity, but otherwise contains the AMA questions & answers in their entirety. - ---- - -**How is Conduit different from Istio?** - -**William:** Conduit and Istio have basically the same goals (and so does Linkerd): provide reliability, security, flexibility, etc for a microservice app by managing the communication layer, adding timeouts, retries, circuit breaking, TLS, policy... all the stuff we know and love from Linkerd. But they come at it from different angles. For Conduit, we're really focused on providing the smallest possible solution that gets you there. Smallest = memory and CPU footprint, latency impact, etc, but also API surface area and the set of things you have to learn about. That’s a really important difference. - -We also have learned a lot from Linkerd over the past 18 months of operating it in prod, and a lot of those lessons are not obvious at all. But we're wrapping them up in Conduit. So, if we do our job right, Conduit will give you all the things you want out of the service mesh without imposing a big burden on either you or your horde of computers. - -**What’s in the Conduit 0.1 release today and can we expect next from what’s on the roadmap?** - -**Oliver:** For the 0.1 release, our primary target was to provide immediate visibility for gRPC services. This was mostly set as an engineering goal to make us solve the hard problems of transporting HTTP/2, which is quite a bit more complex than plain-old HTTP/1. In the upcoming releases, we’ll expand proxy support for HTTP/1, as well as arbitrary TCP streams. Furthermore, I expect we’ll be integrating with CA systems to provide TLS privacy out-of-the-box. - -**When will Conduit be production-ready?** - -**Oliver:** Conduit will be production-ready when it’s in production ;-D - -But we expect that will be in the early-next-year (2018) timeframe. We’re focused on primarily providing outstanding visibility, security by default, and the base operational features that make your system more resilient to consider Conduit production-ready. Much of the configuration surface area for Conduit, I expect, will be introduced after we can productionize a constrained set of features. - -**It looks like Conduit doesn’t currently ship with an Ingress controller. Is the idea that Conduit will have the proxy run as a k8s ingress controller? In the interim do you have and guidance on running Conduit with community provided k8s ingress controllers?** - -**Oliver:** This will become quite a bit easier once we hit our Proxy Transparency goals and Conduit is able to route arbitrary traffic. I expect that once that’s done, we’ll have a way to integrate well with k8s ingress resources; though over time, we’ll want something quite a bit better than them. But that should emerge out of Conduit’s routing functionality. - -**William:** Personally I think it would be cool to see if we can get something like [Contour](https://github.com/heptio/contour) and Conduit working together. - -**Oliver:** Conduor? - -**William:** TourDuit? - -**After I inject Conduit into my running deployments, what happens if the Conduit sidecar dies? Do I lose the entire deployment? How do I recover?** - -**William:** If it dies in one pod, it's tantamount to pod death. We have all sorts of mechanisms for handling that in K8s. But... it shouldn't die. - -**Oliver:** And we’d love a bug report in that case if it does. - -**Are all of the engineers at Buoyant working on Conduit? Is it split teams, some on Conduit and some on Linkerd? How are you supporting both?** - -**William:** We're continuing to invest heavily in Linkerd. It's the world's most widely deployed open source service mesh. And the world's only prod-ready open source service mesh. It's hard to speak for individual engineers, since these are both team efforts. But the same folks who are working on Linkerd are working on Conduit, roughly speaking, and it's important to me for everyone at Buoyant to share expertise across both. - -**Does that mean they're equal or is Buoyant prioritizing one over the other?** - -**William:** A good way of thinking about it is that we're spending our innovation points on Conduit. - -**Oliver:** We want linkerd to be boring. Boring and stable. - -**William:** Conduit will be boring... in the future. - -**What would you say is the most amazing feature of Conduit?** - -**Oliver:** Tap. Definitely tap. Oh, or maybe the per-path stats. Or maybe that flow control is integrated deeply into our buffer management. - -**How can non-Buoyant community members get involved with the new project?** - -**Oliver:** The best way to get involved right now is [filing issues](https://github.com/runconduit/conduit/issues) and giving feedback! But over the next few weeks, we’ll be posting a lot more of our roadmap and good guidance for getting started will be up. - -**William:** Yeah. We already hit our first issue today in the Slack #conduit channel where someone wasn't able to get Conduit working due to (we suspect) RBAC. Which is probably totally right! And awesome. We should fix that. _Editor’s note: that was fixed in the_ [_Conduit 0.1.1 release_]({{< relref "announcing-conduit-0-1-1" >}}) _on Dec 20._ - -**What about from the contribution perspective? Does the choice of Rust limit the pool of potential contributors submitting PR's?** - -**William:** I**'m actually hoping that the fact that the control plane is in Go will lower the barrier for contributions to Conduit. I think with Linkerd we suffer a bit from the fact that Scala (and not just regular Scala, but Finagle-ized Scala) is not trivial to ramp up on.** - -**Oliver:** And Rust is also a non-trivial (though wildly fun) ramp to traverse. - -**William:** Rust has quite a steep learning curve as well, but the data plane is isolated from a lot of what people want to do, so I think overall we're probably in better shape for accepting contributions with Conduit than we are with Linkerd. Besides, that coveted First PR Accepted award is still out there, waiting for just the right person… _Editor’s note: we accepted the_ [_first community PR_](https://github.com/runconduit/conduit/pull/83) _on Dec 22. Thanks,_ [_FaKod_](https://github.com/FaKod)_!_ - -**In playing around with Conduit this weekend I was a little confused by the Proxy Status UI. Can you explain what that represents and how it's intended to be used?** - -{{< fig - alt="dashboard" - title="Dashboard" - src="/uploads/2017/12/Screen-Shot-2017-12-21-at-9.43.47-AM.png" >}} - -**William:** It represents the pods in the deployment, and which ones have the Conduit project injected (green) vs which ones don't (grey). In that screenshot you've captured a deployment in the middle of rolling a \`conduit inject\`, I suspect. So you have a halfway state. There are definitely a couple of things we can do to make this more clear, but that's the goal at least. - -**Kevin Lingerfelt:** Also, just last week we [added hover states for the circles](https://github.com/runconduit/conduit/pull/19) in that column, to help explain what they mean. will be in the [0.1.1 release](https://github.com/runconduit/conduit/releases/tag/v0.1.1). - -**William:** Like that ^. We're also not capturing when pods are in a Terminating state right now. When we add that, we can make the transition states for a deployment even more obvious. - -**As a non-Rust developer interested in getting involved it's not clear how to get started, what the best dev cycle is. So a getting started doc might be helpful?** - -**William:** For sure. that's on our short-term todo list. And you may not actually have to learn Rust to contribute... in fact, I'm hoping that you _don't_ have to learn Rust to contribute except for a very particular set of features. - -**Oliver:** Yeah, totally. The controller APIs are still settling, but those will become the place to help build out better features. - -**What should we expect to see around the release pace for Conduit? Linkerd has been getting released about every 2 weeks. Same for Conduit? More? Less?** - -**William:** I don't know if we'll stick to exactly 2 weeks, though I like that pace. We definitely have some aggressive goals around getting ready for prod usage as rapidly as possible with a minimal feature set, and like Oliver said above, we're aiming to do that by early next year. One of the other things that makes our lives a bit easier vs Linkerd is that the Conduit control plane will be configurable via gRPC plugins. This means a) we can ship with a minimal feature set since everything will be very customizable; and b) that user plugins don't have to run in the data plane. - ---- - -[Try Conduit](https://conduit.io/getting-started/) today. Hopefully the transcript answers some of the questions you've had about Conduit. If it doesn't, pop into the #conduit channel on the [Linkerd Slack group](http://linkerd.slack.com) to chat with us directly. Open issues or submit PR's [directly via Github](https://github.com/runconduit/conduit). And if you want to work with us, [we're hiring](https://buoyant.io/careers/)! diff --git a/linkerd.io/content/blog/consolidated-kubernetes-service-mesh-linkerd-config.md b/linkerd.io/content/blog/consolidated-kubernetes-service-mesh-linkerd-config.md deleted file mode 100644 index 094bf105f9..0000000000 --- a/linkerd.io/content/blog/consolidated-kubernetes-service-mesh-linkerd-config.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -slug: 'consolidated-kubernetes-service-mesh-linkerd-config' -title: 'The Consolidated Kubernetes Service Mesh Linkerd Config' -aliases: - - /2017/08/04/consolidated-kubernetes-service-mesh-linkerd-config/ -author: 'eliza' -date: Fri, 04 Aug 2017 22:20:42 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_featured.png -tags: [Linkerd, linkerd, News, tutorials] ---- - -## A Service Mesh for Kubernetes - -Since [October 2016]({{< ref -"a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}), we’ve provided the popular “A Service Mesh for Kubernetes” series of blog posts, highlighting major features of Linkerd and providing working examples of Linkerd and Kubernetes configurations to make use of them. These posts are a useful way to explore various Linkerd features and use-cases in depth. - -Now we’ve compressed the configuration in the series into a single, consolidated config to act as a canonical starting point for adding the Linkerd service mesh to a Kubernetes cluster. In this post, we’ll talk you through the details of this configuration file. If you just want to get started with it, you can download it here: [Linkerd Kubernetes Service Mesh config file](https://github.com/linkerd/linkerd-examples/blob/master/k8s-daemonset/k8s/servicemesh.yml). - -## A Kubernetes for the Service Mesh - -Many Linkerd users have started with the config files in the “A Service Mesh for Kubernetes” series. This is great, and we’d like to encourage the continued use of these blog posts as an educational resource. However, the configs in these blog posts are intended to demonstrate specific Linkerd features in a self-contained manner. Assembling a fully-featured, production-ready configuration requires stitching together several different configs, and this can be difficult! - -The consolidated [Linkerd Kubernetes config](https://github.com/linkerd/linkerd-examples/blob/master/k8s-daemonset/k8s/servicemesh.yml) merges the configurations across all these posts and provides a complete configuration to deploy a service mesh of Linkerd instances onto your cluster as a Kubernetes [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/). This configuration provides support for HTTP, [HTTP/2](https://buoyant.io/2017/01/10/http2-grpc-and-linkerd/), and [gRPC](https://buoyant.io/2017/04/19/a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit/) calls, as well as  [load balancing](https://buoyant.io/2016/03/16/beyond-round-robin-load-balancing-for-latency/), [circuit breaking](https://buoyant.io/2017/01/13/making-microservices-more-resilient-with-circuit-breaking/), [dynamic routing](https://buoyant.io/2016/11/04/a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting/), and [ingress](https://buoyant.io/2017/04/06/a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller/) and [egress control](https://buoyant.io/2017/06/20/a-service-mesh-for-kubernetes-part-xi-egress/). - -Once deployed, HTTP applications can use Linkerd by setting the `http_proxy` environment variable to  `$(NODE_NAME):4140`, where `NODE_NAME` is the name of the Kubernetes node where the application instance is running. Ingress traffic sent to port 80 (or port 8080 for HTTP/2) on the ingress address of the cluster will be routed according to the Kubernetes Ingress resource, and any egress requests to names that do not correspond to Kubernetes services (e.g. “buoyantiodev.wpengine.com”) will fall back to a DNS lookup and be proxied outside the cluster. - -## Deploying the Service Mesh - -To deploy the service mesh onto your cluster, simply run the following commands: - -```bash -kubectl create ns linkerd -kubectl apply -f https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/servicemesh.yml -``` - -To verify that Linkerd is running, run: - -```bash -kubectl -n linkerd port-forward $(kubectl -n linkerd get pod -l app=l5d -o jsonpath='{.items[0].metadata.name}') 9990 & -``` - -And then open [http://localhost:9990](http://localhost:9990) in your web browser – you should see the Linkerd administration dashboard. - -More instructions on configuring your application to work with the official Kubernetes config can be found in [the Linkerd documentation](https://linkerd.io/getting-started/k8s/). - -## Conclusion - -With this config, it’s easier than ever to get started running Linkerd on Kubernetes! Whether you’re running Kubernetes on a massive production cluster or in [Minikube](https://github.com/kubernetes/minikube) on your laptop; whether you’re already using Linkerd to route critical production traffic or just checking it out for the first time, this config will allow you to easily set up a fully-featured Linkerd service mesh, and serve as a starting point to write your own custom configurations to best suit the needs of your application. - -For more information about [Linkerd’s various features](https://linkerd.io/features/index.html) on Kubernetes, see our [Service Mesh For Kubernetes]({{< ref -"a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) blog series. As always, if you have any questions or just want to chat about Linkerd, join [the Linkerd Slack](http://slack.linkerd.io/) or browse the [Linkerd Support Forum](https://linkerd.buoyant.io/) for more in-depth discussion. diff --git a/linkerd.io/content/blog/debugging-production-issues-with-linkerds-diagnostic-tracing.md b/linkerd.io/content/blog/debugging-production-issues-with-linkerds-diagnostic-tracing.md deleted file mode 100644 index 6347fdb6af..0000000000 --- a/linkerd.io/content/blog/debugging-production-issues-with-linkerds-diagnostic-tracing.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -slug: 'debugging-production-issues-with-linkerds-diagnostic-tracing' -title: "Debugging production issues with Linkerd's diagnostic tracing" -aliases: - - /2018/06/19/debugging-production-issues-with-linkerds-diagnostic-tracing/ -author: 'dennis' -date: Tue, 19 Jun 2018 23:25:51 +0000 -draft: false -featured: false -thumbnail: /uploads/DiagnosticTracing_Linkerd.png -tags: - [ - debug, - debugging, - Linkerd, - linkerd, - microservices, - News, - tracing, - Tutorials & How-To's, - ] ---- - -[Linkerd 1.4.2](https://github.com/linkerd/linkerd/releases/tag/1.4.2) introduces a new _diagnostic tracing_ feature that allows you to send test requests through the system in order to see how Linkerd routed that request at each hop along the way. Diagnostic tracing allows you to quickly solve common Linkerd troubleshooting scenarios without affecting the state of your production services. - -Of the lessons we’ve learned over the last two years from helping companies around the world put Linkerd into production, the one that stands out above the rest is the importance of having solid _runtime diagnostics_. We commonly find that the exact configuration and deployment scheme that worked in staging and dev can exhibit unexpected behavior in production. And just as commonly as they occur, reproducing those anomalies outside of the production environment context can be incredibly difficult. - -In part, that’s a fundamental law of software---new problems _always_ crop up in prod (to paraphrase Mike Tyson, “everyone has a plan until they deploy to production”). This is particularly troublesome for Linkerd because of its position in the stack as an integration layer. By sitting at the intersection of the layer 3 network, service discovery, DNS, application behavior, and many other distributed system components, any latent problem in the interaction between these systems can seem to manifest itself as unexpected behavior in Linkerd itself. - -When Linkerd first gets introduced into new infrastructure, it naturally becomes the first thing that takes the blame whenever anything goes wrong. Our first inclination is to conclude that failures are happening in the new component. Obviously, it’s that new thing we just put in because everything now routes through it! Therefore, introspective diagnostics are vital to determine what’s actually at fault and showing a path to resolution. (For more about the blame phenomenon, see [How to Put a Service Mesh into Production Without Getting Fired](https://www.youtube.com/watch?v=XA1aGpYzpYg)) - -To improve this, we’ve been hard at work over the past few months adding runtime diagnostics to Linkerd to allow operators to rapidly diagnose issues in production. Some examples of these diagnostics include the ability to [introspect current Kubernetes and Namerd watch states](https://github.com/linkerd/linkerd/releases/tag/1.4.1). Additionally, we’re happy to now introduce the new diagnostic tracing feature. - -## An introduction to diagnostic tracing - -Diagnostic tracing is a feature [originally requested by Oliver Beattie](https://github.com/linkerd/linkerd/issues/1732), Head of Engineering at Monzo, during a bout of particularly complex Linkerd production debugging. It was such a great idea that we’ve built it right into Linkerd. Mirroring that troubleshooting scenario and others within the community, we’ve identified several situations where diagnostic tracing could be useful. We found that: - -- In situations where other verification mechanisms do not exist, it can be hard to tell if a request was routed correctly to a destination service. -- If multiple Linkerd instances are involved in request routing (linker-to-linker configuration) it can be difficult to identify which exact Linkerd instance failed to route a request. -- The dtab playground UI presumes you already know how a request is identified. It also does not use Linkerd’s actual internal state to identify and route requests. Rather, the UI spins up new service lookup requests on service names. This can cause discrepancies between how the request is actually routed and how the UI thinks Linkerd will route the request. -- It can be difficult to differentiate whether routing failures originated from a failed service or from a failure within Linkerd. - -With diagnostic tracing, you can send a test request to a service in production, without affecting its production state, to determine what’s happening in these situations. Each Linkerd instance that proxies the test request gathers information about how it’s configured to route requests to its intended service and adds that information to the response. The result is a detailed list of all Linkerd instances that participate in the request path to a service --- effectively creating a "breadcrumb" trail to that service. - -In cases where you want to verify that requests are routed correctly, diagnostic tracing gives you the IP addresses of the destination services where the test request is sent. Diagnostic tracing also gives you step by step name resolution for service names, allowing you to observe how Linkerd resolves these names without having to go through the admin UI.  Finally, because each Linkerd appends its routing context to the response body of test requests, diagnostic tracing shows the role each Linkerd instance plays in the request path and helps you identify which instance may have failed to route a request. - -## Diagnostic tracing in practice - -Let's walk through a scenario to see how diagnostic tracing can help us troubleshoot service failures. In our scenario, Linkerd is set up in a [linker-to-linker](https://github.com/linkerd/linkerd-examples/blob/b5689b517108c2a79138e34d8357787580106e76/k8s-daemonset/k8s/servicemesh.yml) configuration with some downstream service receiving traffic. - -When we send a request to a service (in our example we use a linker-to-linker setup pointing to a hello service) through Linkerd and we see this response: - - -```bash -$ curl http://localhost:4140/hello - -Invalid response 500 -``` - - -The response doesn't really help us troubleshoot where the problem may be coming from. Is this error message being sent from Linkerd? If so, which Linkerd is it coming from in our linker-to-linker configuration? Is this a message from the hello service itself? It's difficult to tell. - -Instead, running a diagnostic trace generates much more useful information. To send a diagnostic trace, set the HTTP method to TRACE and add the “l5d-add-context: true” header. - -For example, sending a diagnostic test request using curl would look like this: - - -```bash -$ curl -X TRACE -H "l5d-add-context: true" - -http://localhost:4140/ -``` - - -Using this command with our hello service, we see this response: - - -```bash -$ curl -X TRACE -H "l5d-add-context: true" http://localhost:4140/hello - -invalid response 500 - ---- Router: incoming --- -request duration: 22 ms -service name: /svc/hello -client name: /%/io.l5d.localhost/#/io.l5d.fs/hello -addresses: [127.0.0.1:7777] -selected address: 127.0.0.1:7777 -dtab resolution: - /svc/hello - /#/io.l5d.fs/hello (/svc=>/#/io.l5d.fs) - /%/io.l5d.localhost/#/io.l5d.fs/hello (SubnetLocalTransformer) - ---- Router: outgoing --- -request duration: 32 ms -service name: /svc/hello -client name: /%/io.l5d.port/4141/#/io.l5d.fs/hello -addresses: [127.0.0.1:4141] -selected address: 127.0.0.1:4141 -dtab resolution: - /svc/hello - /#/io.l5d.fs/hello (/svc=>/#/io.l5d.fs) - /%/io.l5d.port/4141/#/io.l5d.fs/hello (DelegatingNameTreeTransformer$) -``` - - -The diagnostic trace request gives us much more information to work with! From the response, we can see that the request first hits the "outgoing" Linkerd router or the first linker in the linker-to-linker configuration. Then, the request is forwarded to the "incoming" Linkerd router (the second linker). The request is then forwarded to the hello service at `127.0.0.1:7777` and there is where we see the origins of the `invalid response 500`. With diagnostic tracing, we can deduce the **request duration** between each Linkerd hop. the **service name** used to identify the recipient of the TRACE request, the load balancer set of **IP** **addresses** that point to service, the **selected address** used to forward the test request, and the **dtab resolution** or steps Linkerd takes to resolve a service name to a client name. With this information, we can confirm that the hello service generates the error rather than Linkerd and that Linkerd is indeed routing the request correctly. Pretty neat! - -## How it works - -Linkerd checks for TRACE requests that have an `l5d-add-context` header set to true and only adds its routing context in the presence of this header. TRACE requests aren't typically used in a production environment, so it is generally safe to forward requests to production services. Furthermore, HTTP TRACE, according to [RFC 7231, section 4.3.8](https://tools.ietf.org/html/rfc7231#section-4.3.8), is intended to be used by clients to test or diagnose how servers downstream react to incoming requests. During diagnostic tracing, other Linkerd instances may encounter a diagnostic trace request. When that happens, that Linkerd instance forwards the request until it reaches some service that responds with an HTTP status code and possibly a response body. By the time the initial client receives the response, each Linkerd that forwards the request has appended its routing context to the response. The client then gets a detailed list of all services and Linkerd instances along request path. - -## Distributed tracing vs. Diagnostic tracing - -Despite its name, diagnostic tracing is not a substitute for “standard” distributed tracing like Zipkin and OpenTracing. Rather, it is an additional tool that can be used to troubleshoot your applications better. “Standard” distributed tracing is different when compared to diagnostic tracing in several ways: - -- Standard distributed tracing observes actual production traffic, while diagnostic tracing sends dedicated test requests. -- Standard distributed tracing gives you a complete picture of your application if you have configured services to forward trace headers, while diagnostic tracing requests are reflected by the first server that responds to the request. -- Standard distributed tracing typically requires you to view traces through some side-channel like the Zipkin UI, while diagnostic tracing can show you the results of a diagnostic test directly in the response body. - -Standard distributed tracing can answer questions like “what are the ten slowest requests to occur over the past hour, and where were they slow?”. This often requires a more involved setup process to get this kind of information. While distributed tracing is a powerful debugging tool, diagnostic tracing is quick and easy to use. It gives you concise information about whether you can or cannot route a request to a service and what steps are taken to route a request. - -We’re excited to see how the Linkerd community uses this feature and we hope it will be useful in diagnosing the complex and hard-to-reproduce issues that can come up in production deployments! We’d love to hear your thoughts -- if you use Linkerd’s diagnostic tracing to debug a problem, please tell us about it by joining the [Linkerd Slack group](https://linkerd.slack.com/). diff --git a/linkerd.io/content/blog/distributed-tracing-for-polyglot-microservices.md b/linkerd.io/content/blog/distributed-tracing-for-polyglot-microservices.md deleted file mode 100644 index ec722a372c..0000000000 --- a/linkerd.io/content/blog/distributed-tracing-for-polyglot-microservices.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -slug: 'distributed-tracing-for-polyglot-microservices' -title: 'Distributed Tracing for Polyglot Microservices' -aliases: - - /2016/05/17/distributed-tracing-for-polyglot-microservices/ -author: 'kevin' -date: Tue, 17 May 2016 22:28:48 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_featured_ployglot-1.png -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -Distributed tracing is a critical tool for debugging and understanding microservices. But setting up tracing libraries across all services can be costly—especially in systems composed of services written in disparate languages and frameworks. - -In this post, we’ll show you how you can easily add distributed tracing to your polyglot system by combining [Linkerd](https://linkerd.io/), our open source service mesh for cloud-native applications, with[Zipkin](http://zipkin.io/), a popular open source distributed tracing framework. Why distributed tracing? As companies move from monolithic to multi-service architectures, existing techniques for debugging and profiling begin to break down. Previously, troubleshooting could be accomplished by isolating a single instance of the monolith and reproducing the problem. With microservices, this approach is no longer feasible, because no single service provides a complete picture of the performance or correctness of the application as a whole. We need new tools to help us manage the [real complexity of operating distributed systems at scale]({{< relref "real-world-microservices-when-services-stop-playing-well-and-start-getting-real" >}}). - -Distributed tracing is one such tool. With distributed tracing, we can track requests as they pass through multiple services, emitting timing and other metadata throughout, and this information can then be reassembled to provide a complete picture of the application’s behavior at runtime. Distributed tracing has been called the [“the most wanted and missed tool in the microservice world”](https://medium.com/@denis.zaytsev/distributed-tracing-the-most-wanted-and-missed-tool-in-the-micro-service-world-c2f3d7549c47). - -One popular implementation of distributed tracing is [Zipkin](http://zipkin.io/), an open-source project that provides tools for both collecting and analyzing trace data. Applications integrate with Zipkin by reporting request timing data to a Zipkin collector process. This data is stored to a persistent backend and aggregated into traces, which is queried through Zipkin’s query interface or displayed in its web UI. - -Here’s an example trace in the Zipkin UI: - -{{< fig - alt="zipkin ui" - title="zipkin ui" - src="/uploads/2017/07/buoyant-zipkin-trace-overview.png" >}} - -As you can see, Zipkin traces afford us a comprehensive view of runtime application behavior, allowing us to answer questions such as: - -- Which parts of my system are slow? -- Which call patterns can be optimized with parallelization? -- Which calls are redundant? - -Zipkin has been used at scale, and boasts tracing libraries for [many languages and runtimes](https://zipkin.io/pages/tracers_instrumentation.html). However, there are cases where adding a library to an existing service can be costly, or where maintaining feature and performance parity across a wide set of languages can be restrictive. - -Enter [Linkerd](https://linkerd.io/), our open source, industrial-strength *service mesh* for cloud-native apps. There are [many reasons to use Linkerd]({{< -relref "linkerd-twitter-style-operability-for-microservices" >}}), but in this case, we can take advantage of the fact that, since Linkerd is handling your service-to-service HTTP or RPC calls for you, it’s also in the perfect position to automatically add instrumentation and emit tracing data on your behalf—*without* requiring Zipkin libraries per service. - -In fact, since Linkerd is built on top [Finagle](http://twitter.github.io/finagle/guide/), Twitter’s open source, high volume RPC library, Linkerd features Zipkin support that has actually been tested at scale! - -In the rest of this article, we’ll walk you through the process of generating Zipkin traces from Linkerd. - -## SETUP - -For the purpose of this article, we’re going to run a suite of Zipkin services locally, side-by-side with a Linkerd process. This setup works for demonstration, but it’s much more realistic that in production you’ll have hundreds of Linkerd processes all writing timing data to a centralized Zipkin cluster. - -The easiest way to run Zipkin locally is to use the [docker-zipkin](https://github.com/openzipkin/docker-zipkin) project, which also requires that you have [docker-compose](https://docs.docker.com/compose/overview/) installed. Clone the repo, and start Zipkin: - -```bash -git clone https://github.com/openzipkin/docker-zipkin.git -cd docker-zipkin -docker-compose up -``` - -Next let’s create an example `linkerd.yaml` config file that includes the Zipkin tracer config. For instance: - -```yml -admin: - port: 9990 - -telemetry: - - kind: io.l5d.zipkin - host: - port: 9410 - sampleRate: 1.0 - -routers: - - protocol: http - label: router1 - dtab: /svc => /$/inet/127.1/9001 - servers: - - port: 9000 - - protocol: http - label: router2 - dtab: /svc => /$/inet/127.1/9002 - servers: - - port: 9001 - - protocol: http - label: router3 - dtab: /svc => /$/inet/127.1/9990 - servers: - - port: 9002 -``` - -To send data to Zipkin, we use the `io.l5d.zipkin` telemeter. Found in the `telemeters`section of the config above, this tells the Linkerd process that it should generate and emit tracing data. The `host` and `port` config options indicate where the zipkin-collector process is running (be sure to set the host to match the docker IP where you started docker-zipkin in the previous step). The `sampleRate` config option determines what percentage of requests will be traced. Typically that number would be set much lower, but for the purpose of demonstration we are tracing 100% of requests. - -It’s also worth pointing out that the `routers` section of the config file above is contrived in order to make our example requests somewhat more interesting. It defines three routers listening on three different ports. The router on port 9000 forwards to the router on port 9001, which forwards to the router on port 9002, which forwards to the Linkerd admin service on port 9990. There is absolutely no good reason for you to configure Linkerd this way in production, but we use it now to simulate three service hops and demonstrate tracing capabilities. For more information on configuring Linkerd, check out our [Configuration Guide](https://linkerd.io/configuration/). - -Once you’ve created the config, you can run it using the Linkerd executable that’s available for download from the [Linkerd Github releases](https://github.com/linkerd/linkerd/releases) page. For this example, we’ll use the executable from the most recent release: [`linkerd-0.9.0-exec`](https://github.com/linkerd/linkerd/releases/download/0.9.0/linkerd-0.9.0-exec). Start Linkerd with the config file that you created: - -```bash -$ ./linkerd-0.9.0-exec linkerd.yaml -... -I 0511 18:14:47.496 THREAD1: Serving admin http on 9990 -I 0511 18:14:47.613 THREAD1: serving router1 on localhost/127.0.0.1:9000 -I 0511 18:14:47.627 THREAD1: serving router2 on localhost/127.0.0.1:9001 -I 0511 18:14:47.632 THREAD1: serving router3 on localhost/127.0.0.1:9002 -``` - -## TRACING - -Now that Linkerd and Zipkin are up and running, let’s use the configured routers to send a request to Linkerd’s admin/ping endpoint: - -```bash -$ time curl :9000/admin/ping -pong -real 0m0.028s -user 0m0.004s -sys 0m0.004s -``` - -Huh. 28 milliseconds to receive a 4-byte response. That seems suspicious. Can we get some additional information about how the latency for that request breaks down? Sure we can. Let’s visit the Zipkin web UI, which is running on port 9411 in your docker machine. When the UI loads, select `127.0.0.1/9000` from the service dropdown menu, and click `Find Traces`. Select a trace, and then click `Expand All`. You’ll see something like: - -{{< fig - alt="zipkin trace" - title="zipkin trace" - src="/uploads/2017/07/buoyant-zipkin-trace.png" >}} - -This trace shows multiple spans. The top-most span shows `router1` receiving the initial request. This span represents the point at which the request entered our system, from which it took us 18 milliseconds to respond (the other 10 milliseconds can be attributed to curl establishing its own HTTP connection). The next span shows `router1` making a request to `router2`, the third span shows `router2` receiving the request from `router1`, and so on. The very last span shows the actual request from `router3` to the admin interface. The length of the bars shows that each subsequent network request adds a few milliseconds of latency. The actual admin endpoint that we asked for only took 3 milliseconds to respond. - -If you click on the last span in the trace, you can see details about that span, which look something like: - -{{< fig - alt="zipkin detail" - title="zipkin detail" - src="/uploads/2017/07/buoyant-zipkin-detail.png" >}} - -This shows information like the response status code (200) and the response body content length (4 bytes) and content type (text). It also shows specific timing info for the request. The Client Send annotation is the time at which the router instructed its finagle-http client to send the request, whereas the Wire Send annotation represents the time at which the request was actually sent. Wire Receive and Client Receive have similar semantics. In this case, the observed amount of time that it took for the admin server to respond to our request is only 2 milliseconds. This is a good demonstration of how latency is incurred by introducing multiple hops into the system, even in the best-case scenario. You can imagine what happens in large systems where network queueing, garbage collection, and resource contention introduce further sources of delay. Hence the value of distributed tracing! - -It’s especially important to note that the admin server is not instrumented to emit timing data to Zipkin. Simply by using the Linkerd service mesh to route our requests, we’re able to gain valuable insights into how our services are performing without adding additional instrumentation to the services themselves. - -## REQUEST CONTEXT - -While we at Buoyant like to describe all of the additional tracing data that Linkerd provides as “magic telemetry sprinkles for microservices”, the reality is that we need a small amount of request context to wire the traces together. That request context is established when Linkerd receives a request, and, for HTTP requests, it is passed via HTTP headers when Linkerd proxies the request to your application. In order for your application to preserve request context, it needs to include, without modification, all of the inbound `l5d-ctx-*` HTTP headers on any outbound requests that it makes. As an example, check out this [Ruby plugin](https://gist.github.com/klingerf/6365bec92a24e6f6a77e78ecb3a7220a) that handles all header forwarding for a web service built with Sinatra and ActiveResource. - -Forwarding request context for Linkerd comes with far more benefits than just tracing, too. For instance, adding the `l5d-dtab` header to an inbound request will add a dtab override to the request context. Provided you propagate request context, dtab overrides can be used to apply [per-request routing overrides](https://linkerd.io/features/routing/#per-request-routing) at any point in your stack, which is especially useful for staging ad-hoc services within the context of a production application. In the future, request context will be used to propagate overall*latency budgets*, which will make handling requests within distributed systems much more performant. - -Finally, the `L5d-sample` header can be used to adjust the tracing sample rate on a per-request basis. To guarantee that a request will be traced, set `L5d-sample: 1.0`. If you’re sending a barrage of requests in a loadtest that you don’t want flooding your tracing system, consider setting it to something much lower than the steady-state sample rate defined in your Linkerd config. - -## TRYING IT OUT YOURSELF - -We’ve described how to use Linkerd to automatically generate Zipkin traces from service-to-service HTTP or RPC calls without having to add Zipkin libraries into application code. By using the techniques above, applications that use Linkerd can get Zipkin traces with minimal effort. If you’re interested in trying this out, we’re always happy to help folks use Linkerd in new and exciting ways. Check out Linkerd’s [tracing documentation](https://linkerd.io/config/0.9.0/linkerd/index.html#telemetry), and hop into the [Linkerd Slack](http://slack.linkerd.io/) and say hello. diff --git a/linkerd.io/content/blog/http2-grpc-and-linkerd.md b/linkerd.io/content/blog/http2-grpc-and-linkerd.md deleted file mode 100644 index 0eaf57be25..0000000000 --- a/linkerd.io/content/blog/http2-grpc-and-linkerd.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -slug: 'http2-grpc-and-linkerd' -title: 'HTTP/2, gRPC and Linkerd' -author: 'oliver' -date: Wed, 11 Jan 2017 00:16:09 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_GRPC_featured.png -tags: [linkerd, News, Product Announcement] ---- - -In March 2016 at Kubecon EU, I gave my my [first public talk on Linkerd](https://www.youtube.com/watch?v=co7JRxihcdA). At the end of this talk, like most of the other 20+ talks I gave in 2016, I presented a high-level Linkerd [roadmap](https://speakerdeck.com/olix0r/kubernetes-meets-finagle-for-resilient-microservices?slide=34) that aspirationally included HTTP/2 & gRPC integration. As we enter 2017, I’m pleased to say that we’ve reached this initial goal. Let me take this opportunity to summarize what I think is novel about these technologies and how they relate to the future of Linkerd service meshes. - -## WHAT’S THE BIG DEAL? - -### H2: WHY A NEW PROTOCOL? - -The HTTP protocol and its simple enveloped-request-and-response model underlie all communication on the Web. While innumerable schemes have been devised on top of HTTP, the fundamental protocol had changed very little since its inception. - -After HTTP/1.0 was standardized in May 1996, implementers soon realized that it was impractically wasteful and s…l…o…w to establish a new connection for each request; and so the HTTP/1.1 protocol revision was standardized in January 1997. The revised protocol allowed browsers to reuse a connection to service multiple requests. - -This simple communication scheme withstood an additional 15 years of usage with basically no change. However, in 2012, a group of Large Website operations and browser performance experts started to confront the limitations of HTTP/1.1. This resulted in a overhaul of the HTTP protocol: HTTP/2 (or just *h2*). - -Unfortunately, the enhancements introduced in HTTP/1.1 were not enough to meet the performance requirements of modern applications: - -1. In HTTP/1.1, requests are processed *sequentially*. This means that a single slow request may add latency to unrelated requests. -2. HTTP/1.1 has rudimentary support for streaming message bodies via the *chunked* transfer encoding, but streams consume an entire connection, and connections ain’t free. -3. And when the receiver of a stream wants to inform the producer that it doesn’t care to receive data, its only options are to stop calling `read(2)` to exert back-pressure, or it can `close(2)` and re-establish the entire connection before sending additional requests (which, as I’ve said, is costly). - -Need convincing? Look at this trivial example that compares how [HTTP/1.1](http://http2.golang.org/gophertiles?latency=1000) and [HTTP/2](https://http2.golang.org/gophertiles?latency=1000) behave when communicating with a slow endpoint. - -HTTP/2’s primary innovation is that it explicitly separates the (Layer-4) concerns of *connection management* from the (Layer-5) concerns of transmitting HTTP messages. Messages can be multiplexed, reordered, and canceled, eliminating bottlenecks and generally improving performance and reliability of the application at large. - -### GRPC - -Virtually all modern programming environments include a small arsenal for communicating over HTTP; and yet, it’s still far from trivial to begin programming against a new HTTP API in an arbitrary language. HTTP is simply a session protocol and it does virtually nothing to prescribe how applications are written to use it. - -Enter the IDL. *Interface Definition Languages* allows service owners to specify their APIs independently of any given programming language or implementation. Interface definitions are typically used to *generate* communication code for a variety of programming environments, freeing programmers from the subtleties of encoding and transmission. While there are a [multitude of IDLs](https://en.wikipedia.org/wiki/Interface_description_language), [Google Protocol Buffers](https://developers.google.com/protocol-buffers/) (or*protobuf*) and [Apache Thrift](https://thrift.apache.org/) were championed by early microservice adopters like Google, Facebook, & Twitter. More recently the microservice movement, propelled by technologies like Docker, Kubernetes, and Mesos, has amplified the need for tools that simplify service-to-service communication. - -In early 2015, [Google announced gRPC](https://developers.googleblog.com/2015/02/introducing-grpc-new-open-source-http2.html), a “universal RPC framework” which combines the ubiquitous foundation of HTTP with the performance gains of HTTP/2 and the portable interfaces of protobuf. It is novel in that: - -- it transmit metadata through HTTP envelopes, allowing orthogonal features to be layered in (like authentication, distributed tracing, deadlines, and routing proxies ;), -- it provides operational affordances like multiplexed streaming, back-pressure, & cancellation, -- it will be available everywhere HTTP/2 is (like web browsers), -- and it abstracts the details of communication from application code (unlike REST). - -gRPC takes a modular approach in the way that it layers the best features of IDL-specified RPC onto a standard, performant protocol, hopefully finally disabusing people of the notion that HTTP and RPC are different models. - -## H2, GRPC, AND LINKERD - -While Linkerd’s protocols are pluggable, support was initially limited to HTTP/1.1 and Thrift service meshes. For all of the reasons outlined above, we believe that gRPC is the future; so we set our sights on extending Linkerd to serve as a router for gRPC. - -In March 2016, we began to [assess](https://github.com/linkerd/linkerd/issues/174) what this would take. We knew that Netty had a fairly stable, well-tested HTTP/2 implementation, and, by early May, Finagle had undergone the necessary refactoring to support HTTP/2 integration. It’s around this time that [Moses Nakamura](https://github.com/mosesn) started working on the [finagle-http2 subproject](https://github.com/twitter/finagle/tree/develop/finagle-http2). I spent some time testing this for our needs, but ultimately I decided that finagle-http2 was not an expedient path to gRPC support in Linkerd: it intended to sacrifice feature completeness for API compatibility (a totally reasonable tradeoff to make for the finagle project, but insufficient for our needs in Linkerd). - -After several months writing, testing, and rewriting h2 support in Linkerd, the [0.8.2 release](https://github.com/linkerd/linkerd/releases/tag/0.8.2) introduced the ability to route HTTP/2 and gRPC messages. With help from Linkerd users around the world, each Linkerd release is improving the stability and performance of our h2 codec. There’s still [more work to do](https://github.com/linkerd/linkerd/issues?q=is%3Aissue+is%3Aopen+label%3Ah2), but we’re well on our way. - -To that end, I’m also happy to announce that Linkerd’s [0.8.5 release](https://github.com/linkerd/linkerd/releases/tag/0.8.5) introduces support for [gRPC code generation in the Linkerd project](https://github.com/linkerd/linkerd/tree/master/grpc). This will make it possible to, for instance, consume gRPC APIs from Linkerd plugins. Furthermore, we plan on introducing a [gRPC API for namerd](https://github.com/linkerd/linkerd/issues/842) so that you can write namerd clients in any language. - -### GOALS FOR 2017 - -We’ll be investing heavily in the HTTP/2 & gRPC ecosystem in Linkerd in 2017: - -#### Graduate from `experimental` - -Currently, the *h2* Linkerd router protocol is marked as *experimental*. Once we’ve completed a broader set of compatibility tests, and ideally once there are a few other serious users of Linkerd’s h2 protocol, we’ll [remove the `experimental` flag](https://github.com/linkerd/linkerd/issues/854). - -#### Support HTTP/1.1->HTTP/2 upgrade - -Currently, a Linkerd router may be configured to accept *either* HTTP/1 *or* HTTP/2 messages. However, Linkerd should be able to [upgrade HTTP/1 messages to HTTP/2](https://github.com/linkerd/linkerd/issues/841), without the application’s participation. This will allow Linkerd to be much more efficient in terms of how it manages inter-node connections. - -#### gRPC control plane - -After months of use, we’re eager to [replace Namerd’s Thrift API with gRPC](https://github.com/linkerd/linkerd/issues/842). If we’re happy with this, I’d like to create additional gRPC APIs for plugins so you can write controllers for Linkerd in any language. - -I am frequently asked how Linkerd configuration should be updated at runtime. In short, I don’t think Linkerd’s *configuration* should have to be updated all that often. If it changes frequently, it’s not configuration; it’s data and deserves its own service abstraction. gRPC will help make my idealized view of the world a practical reality. I can’t wait. - -## IN SUMMARY… - -I’m thrilled that Linkerd is embracing foundational technologies of the future like HTTP/2 and gRPC. - -This was challenging work, but we’re fortunate to be building on an outstanding framework (and community!) provided by [Finagle](http://finagle.github.io/) & [Netty](http://netty.io/). We’re also lucky that the Linkerd community is full of thoughtful users who are eager to test new features and provide feedback. Thanks especially to [@mosesn](https://github.com/mosesn) and the Finagle team, [@normanmaurer](https://github.com/normanmaurer) and the Netty team, and to Linkerd users like [@irachex](https://github.com/irachex), [@markeijsermans](https://github.com/markeijsermans), [@moderation](https://github.com/moderation), [@pinak](https://github.com/pinak), [@stvndall](https://github.com/stvndall), [@zackangelo](https://github.com/zackangelo) (and anyone else) who gave us early feedback. - -And our work isn’t finished. We need your help [testing *h2*](https://linkerd.io/config/0.8.5/linkerd/index.html#http-2-protocol). Also, let me take this opportunity to invite you to [contribute to Linkerd on Github](https://github.com/linkerd/linkerd/labels/help%20wanted). At the very least, you should feel free to [join us on Slack](https://slack.linkerd.io/)! diff --git a/linkerd.io/content/blog/introducing-conduit.md b/linkerd.io/content/blog/introducing-conduit.md deleted file mode 100644 index 34d567ae33..0000000000 --- a/linkerd.io/content/blog/introducing-conduit.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -slug: 'introducing-conduit' -title: 'Introducing Conduit' -aliases: - - /2017/12/05/introducing-conduit/ -author: 'william' -date: Tue, 05 Dec 2017 16:00:44 +0000 -draft: false -featured: false -thumbnail: /uploads/conduit_introducing_conduit_featured.png -tags: - [ - Buoyant, - buoyant, - conduit, - Conduit, - Industry Perspectives, - News, - Release Notes, - ] ---- - -Conduit is now part of Linkerd! [Read more >]({{< relref "conduit-0-5-and-the-future" >}}) - -Today, we’re very happy to introduce [Conduit](http://conduit.io), our new open source service mesh for Kubernetes. - -We’ve built Conduit from the ground up to be the fastest, lightest, simplest, and most secure service mesh in the world. It features an incredibly fast and safe data plane written in [Rust](https://www.rust-lang.org/), a simple yet powerful control plane written in [Go](https://golang.org/), and a design that’s focused on performance, security, and _usability_. Most importantly, Conduit incorporates the many lessons we’ve learned from over 18 months of production service mesh experience with [Linkerd](https://linkerd.io). - -Why build Conduit? Linkerd is the most widely deployed production service mesh in the world. It introduced the term “service mesh”, spawning a whole new category of software infrastructure, and has powered trillions of production requests across the globe at companies like Salesforce, Paypal, Expedia, AOL, and Monzo. Throughout this time, we’ve been deeply involved with our customers and users—we’ve sat in their meetings, we’ve built joint roadmaps, and we’ve woken up with them at 3am to firefight. We’ve learned an incredible amount about what happens when open source infrastructure meets the real world. - -One thing we’ve learned is that there are deployment models where Linkerd’s resource footprint is simply too high. While Linkerd’s building blocks---widely-adopted, production-tested components like Finagle, Netty, Scala, and the JVM---allow Linkerd scale _up_ to incredibly high workloads when given lots of CPU and RAM, they aren’t designed to scale _down_ to environments that have limited resources---in particular, to sidecar-based Kubernetes deployments. So, earlier this year, we asked ourselves: if we could build the ideal service mesh, focused on ultra-low-resource environments, but _with_ the benefit of everything we’ve learned from 18 months of production service mesh experience---what would we build? - -The answer is Conduit. Conduit is a next generation service mesh that makes microservices safe and reliable. Just like Linkerd, it does this by transparently managing the runtime communication _between_ services, automatically providing features for observability, reliability, security, and flexibility. And just like Linkerd, it’s deployed as a data plane of lightweight proxies that run alongside application code, and a control plane of highly available controller processes. Unlike Linkerd, however, Conduit is explicitly designed for low resource sidecar deployments in Kubernetes. - -## So what makes Conduit so great? - -### **Blazingly fast and lightweight** - -A single Conduit proxy has a sub-millisecond p99 latency and runs with less than 10mb RSS. - -### **Built for security** - -From Rust’s memory safety guarantees to TLS by default, we’re focused on making sure Conduit has security in mind from the very beginning. - -### **Minimalist** - -Conduit’s feature set is designed to be as minimal and as composable as possible, while allowing customization through gRPC plugins. - -### **Incredibly powerful** - -From built-in aggregated service metrics to a powerful CLI to features like _tap_ (think “tcpdump for microservices”), Conduit gives operators some new and incredibly powerful tools to run microservices in production. - -We’ve been hard at work on Conduit for the past 6 months. We’ve hired incredible people like [Phil](http://philcalcado.com/), [Carl](https://github.com/carllerche), [Sean](http://seanmonstar.com), and [Brian](https://briansmith.org). We’ve invested in core technologies like [Tokio](https://github.com/tokio-rs/tokio) and [Tower](http://github.com/tower-rs/tower) that make Conduit extremely fast without sacrificing safety. Most importantly, we’ve designed Conduit to solve real world problems based on all we’ve learned from our Linkerd community. - -## What does this mean for Linkerd? - -In short, very little. Linkerd is the most widely adopted production service mesh in the world, and it won’t be going anywhere. We’ll continue to develop, maintain, and provide commercial support for Linkerd, and we’re committed to ensuring that our many production Linkerd users remain happy campers. - -Conduit is not Linkerd 2.0. Conduit targets a very specific environment—Kubernetes—and does not address any of the wide variety of platforms or integration use cases supported by Linkerd. For our many users of ECS, Consul, Mesos, ZooKeeper, Nomad, Rancher, or hybrid and multi-environment setups spanning these systems, Linkerd is the best service mesh solution for you today, and we’ll continue to invest in making it even better. - -## Try it now! - -We’ve just released Conduit 0.1. [Try it here](https://conduit.io)! This is an alpha release—so early that it only supports HTTP/2 (i.e. doesn’t even support HTTP/1.1). That said, we wanted to get it out the door so that early adopters and enthusiasts could start experimenting with it, and because we want _your_ input on how to make Conduit work for you. - -Over the next few months, we’ll be aggressively working toward making Conduit ready for production, and in 0.2, targeted for early next year, we’ll add support for HTTP/1.1 and TCP. (We've published [the Conduit roadmap here](https://conduit.io/roadmap/)). We’ll be very public in our progress and the goals we’re setting for the project. Finally, we’ll also offer commercial support for Conduit—if this interests you, please [reach out to us](mailto:hello@buoyant.io) directly. - -Want to learn more? Subscribe to the release announcements mailing list, join us in [the #Conduit channel in Linkerd Slack](http://slack.linkerd.io), follow [@runconduit](https://twitter.com/runconduit) on Twitter for updates and news, or find us on [GitHub](https://github.com/runconduit). - -Conduit is open source and licensed under Apache 2.0. - -(And, of course, we’re hiring! If you think Conduit and Linkerd are the coolest things since time-sliced multitasking, [take a gander at our careers page](http://buoyant.io/careers) and drop us a note!) diff --git a/linkerd.io/content/blog/introducing-linkerd-tcp.md b/linkerd.io/content/blog/introducing-linkerd-tcp.md deleted file mode 100644 index 4ca2446759..0000000000 --- a/linkerd.io/content/blog/introducing-linkerd-tcp.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -slug: 'introducing-linkerd-tcp' -title: 'Introducing Linkerd-tcp' -aliases: - - /2017/03/29/introducing-linkerd-tcp/ -author: 'oliver' -date: Wed, 29 Mar 2017 23:32:27 +0000 -thumbnail: /uploads/linkerd_tcp_featured.png -draft: false -featured: false -tags: [Linkerd, linkerd, News] ---- - -Yesterday, at [Kubecon EU](http://events17.linuxfoundation.org/events/kubecon-and-cloudnativecon-europe), I announced an exciting new project in the Linkerd family: [Linkerd-tcp](https://github.com/linkerd/linkerd-tcp). Linkerd-tcp is a lightweight, service-discovery-aware, TLS-ing TCP load balancer that integrates directly with the existing Linkerd service mesh ecosystem. It’s small, fast, and secure—and, like Linkerd itself, integrates with a wide variety of service discovery and orchestration systems including Kubernetes, DC/OS, and Consul. - -Since Linkerd’s introduction in February 2016, we’ve relied on [Finagle](https://twitter.github.io/finagle/)’s production-tested logic for handling the hard parts of *service resilience*: features like circuit breaking, retries, deadlines, request load balancing, and service discovery. These techniques are difficult to implement and can interact with each other in complex and subtle ways. Building Linkerd on a solid, production-tested framework was incredibly important for us, especially for a young project. - -For some use cases, however, Finagle’s broad suite of request-level resilience features are not necessary. Sometimes it’s sufficient to simply proxy TCP. In these situations, the service mesh model still makes sense, as does the need to integrate with service discovery and orchestration systems, but the relative weight of the JVM (as brought in via Finagle) is excessive. - -Enter [Linkerd-tcp](https://github.com/linkerd/linkerd-tcp): a lightweight, service-discovery-aware, TLS-ing TCP load balancer that integrates directly with the existing Linkerd service mesh ecosystem. Out of the box, Linkerd-tcp interoperates with [Namerd](https://linkerd.io/in-depth/namerd/) to support Kubernetes, Marathon, Consul, etcd, and ZooKeeper for service discovery. Like Linkerd, it features runtime control over routing policy and highly-granular metrics reporting. Linkerd-tcp’s configuration is very similar to that of Linkerd, and the two are designed to happily coexist (for example, they can, and should, share a Namerd deployment). - -If you’re interested in a TCP proxy that: - -- can read from the Kubernetes API (or Consul, etcd, Marathon, ZooKeeper, etc.); -- has excellent TLS support, with (future work!) all the bells and whistles; -- is highly instrumented and exports directly to Prometheus; -- can run alongside Linkerd and Namerd; -- is small, fast, and light; - -…then Linkerd-tcp is for you! - -We’re incredibly excited about Linkerd-tcp and the opportunities it represents for the service mesh model. *Linkerd-tcp is currently in beta*, so we would love *your* help—please test, write bug reports, contribute pull requests, and generally use the heck out of it! - -## RUST - -Linkerd-tcp is written in [Rust](https://www.rust-lang.org/), using the excellent [tokio](https://github.com/tokio-rs/tokio) library. We’ve been watching Rust for a long time, and while it’s still young, we feel that the language and ecosystem have reached the point where we are comfortable supporting a project of this nature. - -In many ways, Rust is a perfect language for us. As Scala programmers, Rust’s excellent strong typing and functional programming aspects are immediately appealing. As Finagle programmers, tokio is a natural transition, and supports (explicitly!) much of the same programming model that makes Finagle great. Finally, as systems programmers, we’re thrilled to be able to build native binaries with tiny resource footprints, while not worrying about buffer overruns, RCEs, and problems that can plague traditional systems languages. - -## Downloading and compiling - -The [Linkerd-tcp source code is on GitHub](https://github.com/linkerd/linkerd-tcp). See the [README](https://github.com/linkerd/linkerd-tcp/blob/master/README.md#quickstart) for information about compiling and running. We’ve also provided a [Linkerd-tcp Docker image](https://hub.docker.com/r/linkerd/linkerd-tcp/). Below, we’ll use this image to walk through an example of how you can use it in your own environment. - -## Running the demo - -To demonstrate the capabilities of running Linkerd-tcp with Namerd, we’ve set up a [Linkerd-tcp demo project](https://github.com/linkerd/linkerd-examples/tree/master/linkerd-tcp) in the [linkerd-examples repo](https://github.com/linkerd/linkerd-examples). In the demo, we’re using Linkerd to route incoming HTTP traffic to a cluster of Go web servers. The web servers cache their results in Redis, and they communicate with Redis via Linkerd-tcp. All routing policy for both HTTP requests and Redis requests is stored in Namerd. You can run the demo yourself using docker-compose: - -```bash -git clone https://gitub.com/linkerd/linkerd-examples.git && \ - cd linkerd-examples/linkerd-tcp && \ - docker-compose build && docker-compose up -d -``` - -### VISIBILITY - -Linkerd and Linkerd-tcp are both configured to export metrics to [Prometheus](https://prometheus.io/), which provides visibility across all of the backends in our setup. As part of the demo we’ve collected the metrics into a [Grafana](https://grafana.com/) dashboard, which displays the Linkerd and Linkerd-tcp metrics side by side. To view the dashboard, go to port 3000 on your Docker host. It will look like this: - -{{< fig - alt="dashboard" - title="Dashboard" - src="/uploads/2017/07/buoyant-linkerd-tcp-dashboard-1-large-1024x692.png" >}} - -The first row of the dashboard displays Linkerd stats. Since Linkerd is a layer 7 router, operating at the request level, it’s able to export protocol-specific information about the HTTP requests that it routes, including overall request count and per-request latency. - -The second row of the dashboard displays Linkerd-tcp stats. Since Linkerd-tcp is a layer 3 / 4 proxy, it exports connection-layer information about the TCP connections it proxies, including the total number of bytes sent, and the number of presently active connections. - -The third row of the dashboard displays stats exported directly from Redis, including the number of commands executed per second and the number of client connections. In this demo we’re running two separate Redis clusters, but we have initially configured Linkerd-tcp via Namerd to send all Redis traffic to the first cluster. By modifying the routing rules in Namerd, we can shift traffic from the first Redis cluster to the second, without restarting any of our components. Let’s do that now. - -### TRAFFIC SHIFTING - -In our service mesh setup, Namerd acts as a global routing policy store, serving the routing rules (called [Dtabs](https://linkerd.io/in-depth/dtabs/)) that both Linkerd and Linkerd-tcp use to route requests. Changing Dtabs in Namerd allows us to reroute traffic in Linkerd and Linkerd-tcp. We can use the [namerctl](https://github.com/linkerd/namerctl) command line utility to make these changes. - -Start by installing namerctl: - -```bash -go get -u github.com/linkerd/namerctl -``` - -namerctl uses [Namerd’s HTTP API](https://linkerd.io/config/0.9.1/namerd/index.html#http-controller), which in our example is running on port 4180 on your Docker host. Configure namerctl to talk to Namerd by setting the `NAMERCTL_BASE_URL` environment variable in your shell: - -```bash -export NAMERCTL_BASE_URL=http://$DOCKER_IP:4180 -``` - -Where `$DOCKER_IP` is the IP of your Docker host. With the environment variable set, we can fetch Namerd’s default routing policy with: - -```bash -$ namerctl dtab get default -# version AAAAAAAAAAE= -/cluster => /#/io.l5d.fs ; -/svc => /cluster ; -/svc/redis => /cluster/redis1 ; -``` - -This Dtab tells Linkerd-tcp to send all Redis requests to the first Redis cluster, which is identified in service discovery as `redis1`. We have a separate service discovery entry for the `redis2` cluster, and we can rewrite our Dtab to instead send traffic there with: - - -```bash -$ namerctl dtab get default | sed 's/redis1/redis2/' > default.dtab -$ namerctl dtab update default default.dtab -Updated default -``` - - -Returning to the Grafana UI, you’ll see that the `redis2` instance is now receiving 100% of traffic from the web service backends, as reported by Linkerd-tcp: - -{{< fig - alt="linkerd-tcp" - title="Linkerd-tcp" - src="/uploads/2017/07/traffic-shifting.png" >}} - -Dtabs are an incredibly powerful system that provide fine-grained control over traffic routing. In this example we’ve only scratched the surface of what they can accomplish. To learn more about Dtabs, see [Dynamic Routing with Namerd](/2016/05/04/real-world-microservices-when-services-stop-playing-well-and-start-getting-real/#dynamic-routing-with-namerd) or check out some of the examples in our Kubernetes Service Mesh series, e.g. [Continuous Deployment via Traffic Shifting][part-iv]. - -## Want more? - -This is just the beginning, and we have some very big plans for Linkerd-tcp. Want to get involved? [Linkerd-tcp is on Github](https://github.com/linkerd/linkerd-tcp). And for help with Linkerd-tcp, Dtabs, or anything else about the Linkerd service mesh, feel free to stop by the [Linkerd community Slack](https://slack.linkerd.io/) or post a topic on the [Linkerd Support Forum](https://linkerd.buoyant.io/)! - -[part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} diff --git a/linkerd.io/content/blog/linkerd-0-8-4-released.md b/linkerd.io/content/blog/linkerd-0-8-4-released.md deleted file mode 100644 index b2a4293edd..0000000000 --- a/linkerd.io/content/blog/linkerd-0-8-4-released.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -slug: 'linkerd-0-8-4-released' -title: 'Linkerd 0.8.4 released' -aliases: - - /2016/12/07/linkerd-0-8-4-released/ -author: 'william' -date: Wed, 07 Dec 2016 00:11:05 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_version_084_featured.png -tags: [Linkerd, linkerd, News, Product Announcement] ---- - -We’re happy to announce that we’ve released [linkerd 0.8.4](http://github.com/linkerd/linkerd/releases/tag/0.8.4)! With this release, two important notes. First, Kubernetes and Consul support are now officially production-grade features—high time coming, since they’re actually already used widely in production. Second, this release features some significant improvements to Linkerd’s HTTP/2 and gRPC support, especially around backpressure and request cancellation. - -## KUBERNETES AND CONSUL NAMERS NO LONGER EXPERIMENTAL - -Here at the Buoyant code mines, we tend to be pretty conservative about marking features as “production ready”. Both Kubernetes and Consul namers have had the `experimental` flag since they were introduced to Linkerd many months ago. - -However, since these namers are being used extensively in production by companies and organizations such as [Olark](http://olark.com/), [Monzo](http://monzo.com/), and [NCBI](https://www.ncbi.nlm.nih.gov/), and are free of known bugs and performance issues, it’s time to remove the experimental flag from those namers. - -So, as of Linkerd 0.8.4, Linkerd’s Kubernetes support and Consul support are both officially production-grade. - -## IMPROVED HTTP/2 AND GRPC SUPPORT - -Over the past few releases, [Oliver](https://twitter.com/olix0r) has been working hard on improving Linkerd’s HTTP/2 support. Since Linkerd doesn’t parse the request body, HTTP/2 support also gives us [gRPC support](https://linkerd.io/features/grpc/). - -In 0.8.4, we started testing Linkerd against known-good gRPC clients and servers, including non-Go implementations. As a result of this testing, Linkerd 0.8.4 includes much improved support for HTTP/2 and gRPC, especially around HTTP/2’s backpressure and request cancellation features. - -For now, HTTP/2 and gRPC support remain behind the experimental flag. However, production-ready HTTP/2 and gRPC support are on our short term roadmap, and you should expect to see these features continue to improve over the next few releases. - -We hope you enjoy this release. For more about HTTP/2 or gRPC with Linkerd, feel free to stop by our [Linkerd community Slack](http://slack.linkerd.io/), ask a question on the [Linkerd Support Forum](https://linkerd.buoyant.io/), or [contact us directly](https://linkerd.io/overview/help/). - -—William and the gang at [Buoyant](https://buoyant.io/) diff --git a/linkerd.io/content/blog/linkerd-0-9-0-released.md b/linkerd.io/content/blog/linkerd-0-9-0-released.md deleted file mode 100644 index 4680da4f08..0000000000 --- a/linkerd.io/content/blog/linkerd-0-9-0-released.md +++ /dev/null @@ -1,189 +0,0 @@ ---- -slug: 'linkerd-0-9-0-released' -title: 'Linkerd 0.9.0 released' -aliases: - - /2017/02/23/linkerd-0-9-0-released/ -author: 'alex' -date: Thu, 23 Feb 2017 00:24:50 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_version_009_featured.png -tags: [Linkerd, linkerd, News, Product Announcement] ---- - -Today we’re happy to release Linkerd 0.9.0, our best release yet! This release is jam packed with internal efficiency upgrades and major improvements to the admin dashboard. We also took this opportunity to make some backwards incompatible changes to simplify Linkerd configuration. See the bottom of this post for a [detailed guide](#config-upgrade-guide) on what changes you’ll need to make to your config to upgrade from 0.8.\* to 0.9.0. - -You can download Linkerd 0.9.0 from [github](https://github.com/linkerd/linkerd/releases/tag/0.9.0) or find images on [dockerhub](https://hub.docker.com/r/buoyantio/linkerd). To learn about all the great stuff included in 0.9.0, read on! - -## ADMIN DASHBOARD IMPROVEMENTS - -We’ve added some bar charts and stats to the admin dashboard to give you further visibility into the performance of your services: - -- Added a retries stat to the router’s summary, so you can see at a glance if something is wrong with an underlying service, causing Linkerd to retry requests. -- Added a retries bar chart per router which shows the percentage of your configured retry budget that has been used. (The default budget is 20%.) -- Added a client pool health bar, showing the ratio of live endpoints to total endpoints. - -We’ve also made some appearance tweaks to make the dashboard easier to consume: - -- Clients now have a colored border to make them easier to distinguish. -- Long transformer prefixes have been hidden! To see the full label, click on the client name. -- Collapsing a client will hide it from the top client requests graph. - -## SIMPLER LOGICAL NAMES - -Naming and [routing](https://linkerd.io/in-depth/routing/) are some of the most complex aspects to configuring Linkerd, especially for new users. To simplify this, we’re changing the default identifier to produce names like `/svc/foo` rather than `/http/1.1/GET/foo`. These shorter names are easier to understand and to write dtabs for. - -We recommend updating your dtabs to use the simpler `/svc` style names. If you don’t want to do that immediately, the previous default `io.l5d.methodAndHost` identifier can still be [configured explicitly]({{< relref "linkerd-0-9-0-released" >}}#config-upgrade-guide). - -## MORE IDIOMATIC PROMETHEUS METRICS - -We’ve done a lot of work to change the Prometheus metrics output to take advantage of Prometheus’s tags, and to better fit Prometheus metrics naming conventions. As part of this change, the `/admin/metrics/prometheus` endpoint is no longer provided by default. To get this endpoint you need to add the [`io.l5d.prometheus` telemeter](https://linkerd.io/config/0.9.0/linkerd/index.html#prometheus) to your config. - -### COUNTERS AND GAUGES - -Configuration-specific data has moved from metric names to labels. This should minimize the number of metric names while still providing granular breakdowns of metrics across various configurations. For example: - -```txt -rt:http:dst:id:_:io_l5d_fs:service1:path:http:1_1:GET:linkerd:4140:requests -``` - -Becomes: - -```txt -rt:dst_id:dst_path:requests{rt="http", dst_id="#/io.l5d.fs/service1", dst_path="svc/linkerd:4140"} -``` - -### HISTOGRAMS AND SUMMARIES - -Prior to 0.9.0, Linkerd exported histograms as collections of gauges with a stat label. Linkerd now exports histograms as Prometheus summaries. For example: - -```txt -rt:http:dst:path:http:1_1:GET:linkerd:4140:request_latency_ms{stat="avg"} -rt:http:dst:path:http:1_1:GET:linkerd:4140:request_latency_ms{stat="count"} -rt:http:dst:path:http:1_1:GET:linkerd:4140:request_latency_ms{stat="max"} -rt:http:dst:path:http:1_1:GET:linkerd:4140:request_latency_ms{stat="min"} -rt:http:dst:path:http:1_1:GET:linkerd:4140:request_latency_ms{stat="p50"} -rt:http:dst:path:http:1_1:GET:linkerd:4140:request_latency_ms{stat="p90"} -... -rt:http:dst:path:http:1_1:GET:linkerd:4140:request_latency_ms{stat="stddev"} -rt:http:dst:path:http:1_1:GET:linkerd:4140:request_latency_ms{stat="sum"} -``` - -Becomes: - -```txt -rt:dst_path:request_latency_ms_avg{rt="http", dst_path="svc/linkerd:4140"} -rt:dst_path:request_latency_ms_count{rt="http", dst_path="svc/linkerd:4140"} -rt:dst_path:request_latency_ms_sum{rt="http", dst_path="svc/linkerd:4140"} -rt:dst_path:request_latency_ms{rt="http", dst_path="svc/linkerd:4140", quantile="0"} -rt:dst_path:request_latency_ms{rt="http", dst_path="svc/linkerd:4140", quantile="0.5"} -rt:dst_path:request_latency_ms{rt="http", dst_path="svc/linkerd:4140", quantile="0.9"} -... -rt:dst_path:request_latency_ms{rt="http", dst_path="svc/linkerd:4140", quantile="1"} -``` - -## USAGE REPORTING - -To continue our never-ending quest to improve Linkerd, we need a broad picture of how users are running it. To this end, 0.9.0 includes some basic anonymized usage reporting. We’ve been careful to capture only non-identifying information, and to make it easy for you to disable this feature. Linkerd captures: - -- How it is configured (kinds of namers, initializers, identifiers, transformers, protocols, & interpreters used) -- What environments it is running in (OS, orchestrator, etc), -- Performance metrics - -It do not capture the labels of namers/routers, designated service addresses or directories, dtabs, or any request or response data. To review the payload reported at any point, visit `:9990/admin/metrics/usage`. To disable reporting, simply set `enabled: false` in your Linkerd config under the top-level `usage:` section: - -```yml -usage: - enabled: false -``` - -You can also optionally provide an organization ID string that will help us to identify your organization if you so choose: - -```yml -usage: - orgId: my-org -``` - -## CONFIG UPGRADE GUIDE - -Follow these steps to upgrade an 0.8.\* config into one that will work with 0.9.0: - -- The `dtabBase` field to has been renamed to just `dtab`. - -FROM - -```txt -routers: -- protocol: http - baseDtab: / => /#/io.l5d.k8s/default/http; -``` - -TO - -```txt -routers: -- protocol: http - dtab: / => /#/io.l5d.k8s/default/http; -``` - -- The `io.l5d.commonMetrics` telemeter no longer exists and should be removed from configs. Metrics will continue to be served on `/admin/metrics.json` without requiring that they be enabled via the `io.l5d.commonMetrics` telemeter. -- The `tracers` section has been removed in favor of the `telemetry` section. The only tracer that was previously provided was the `io.l5d.zipkin` tracer. That configuration can be moved directly to the `telemetry` section; none of its configuration options have changed. - - FROM - -```txt -tracers: -- kind: io.l5d.zipkin - sampleRate: 1.0 -``` - -TO - -```txt - telemetry: - - kind: io.l5d.zipkin - sampleRate: 1.0 -``` - -- The default `dstPrefix` has changed from the protocol name (e.g. `/http`, `/thrift`, etc.) to simply `/svc`. To get the old behavior you'll need to manually set the`dstPrefix` to the protocol name. E.g. `dstPrefix: /http`.Alternatively, update your dtab to expect names starting with `/svc` instead of with the protocol name. E.g. replace `/http/foo => ...` with `/svc/foo => ...`. - -- The default HTTP identifier has changed from `io.l5d.methodAndHost` to `io.l5d.header.token`. To get the old behavior you'll need to manually set the identifier to `io.l5d.methodAndHost`. Alternatively, update your dtab to expect names of the form `/svc/` instead of `/http/1.1//`.See the section above on [Simpler Logical Names](#simpler-logical-names) to learn about the motivation for these two changes. - - FROM - - ```yml - routers: - - protocol: http - baseDtab: | - /srv => /#/io.l5d.k8s/default/http; - /http/*/* => /srv; - ``` - - TO - -```yml -routers: - - protocol: http - dtab: | - /srv => /#/io.l5d.k8s/default/http; - /svc => /srv; -``` - -OR - -```yml -routers: - - protocol: http - identifier: - kind: io.l5d.methodAndHost - dstPrefix: /http - dtab: | - /srv => /#/io.l5d.k8s/default/http; - /http/*/* => /srv; -``` - -## THANKS! - -Thank you to all of our users, the wonderful Linkerd community, and an extra special thank you to [Borys Pierov](https://twitter.com/Ashald), [Mark Eijsermans](https://twitter.com/markeijsermans), and [Don Petersen](https://github.com/dpetersen) for their contributions to this release. - -If you run into any issues whatsoever porting your Linkerd setup to use 0.9.0, don’t hesitate to hop into the [Linkerd community Slack](http://slack.linkerd.io/), and we’ll help you get it sorted out. diff --git a/linkerd.io/content/blog/linkerd-1-2-0-announcement-migration-guide.md b/linkerd.io/content/blog/linkerd-1-2-0-announcement-migration-guide.md deleted file mode 100644 index 47f33210d6..0000000000 --- a/linkerd.io/content/blog/linkerd-1-2-0-announcement-migration-guide.md +++ /dev/null @@ -1,152 +0,0 @@ ---- -slug: 'linkerd-1-2-0-announcement-migration-guide' -title: 'Linkerd 1.2.0 is here! Features, bugfixes, and migration' -aliases: - - /2017/09/08/linkerd-1-2-0-announcement-migration-guide/ -author: 'eliza' -date: Fri, 08 Sep 2017 20:23:01 +0000 -thumbnail: /uploads/linkerd_version_12_featured.png -draft: false -featured: false -tags: [Linkerd, linkerd, News] ---- - -We're very excited to announce [Linkerd](https://github.com/linkerd/linkerd/releases/tag/1.2.0) [version 1.2.0](https://github.com/linkerd/linkerd/releases/tag/1.2.0)! This is a huge release with a lot of new features, fixes, and performance improvements, especially for our users running Linkerd with Kubernetes, HTTP/2, or gRPC. There are also a handful of breaking changes in 1.2.0 , so we've included a migration guide below to help make the transition as easy as possible. - -As usual, release artifacts are available [on GitHub](https://github.com/linkerd/linkerd/releases/tag/1.2.0), and Docker images are available [on Docker Hub](https://hub.docker.com/r/buoyantio/linkerd/). - -## Community Contributors - -We'd like to take a moment to highlight contributions from the Linkerd community members in this release. - -- Christopher Taylor ([@ccmtaylor](https://github.com/ccmtaylor)): added DNS SRV record support -- Andrew Wright (@blacktoe): improved Consul health checks -- Cyril Ponomaryov ([@cponomaryov](https://github.com/cponomaryov)): fixed an issue in the `config.json` admin endpoint, and made some performance improvements in logging -- Marcin Mejran ([@mejran](https://github.com/mejran)): fixed a memory leak in `JsonStreamParser` - -In addition to contributions from the community, this release also contains the first contributions from [Phil Calçado](http://philcalcado.com/2017/08/09/buoyant.html), the newest member of the Linkerd engineering team. - -## New Features in 1.2.0 - -### New DNS SRV Record support - -Thanks to Christopher Taylor ([@ccmtaylor](https://github.com/ccmtaylor)) at SoundCloud, Linkerd 1.2.0 features a new `io.l5d.dnssrv` namer that allows Linkerd to use DNS SRV records for service discovery. - -An example configuration for the DNS SRV namer might look like this: - -```yml -namers: - - kind: io.l5d.dnssrv - experimental: true - refreshIntervalSeconds: 5 - dnsHosts: - - ns0.example.org - - ns1.example.org -``` - -The `dnsHosts` configuration key specifies a list of DNS servers against which to perform SRV lookups, while the `refreshIntervalSeconds` key specifies the frequency of lookups. Please note that this namer is still considered experimental, so `experimental: true` is required. Once the DNS SRV namer is configured, it can be referenced in the dtab to use it: - -```yml -dtab: | - /dnssrv => /#/io.l5d.dnssrv - /svc/myservice => - /dnssrv/myservice.srv.example.org & - /dnssrv/myservice2.srv.example.org; - /svc/other => - /dnssrv/other.srv.example.org; -``` - -Please see the DNS SRV namer [documentation](https://linkerd.io/config/head/linkerd/index.html#dns-srv-records) for more information. - -### Improved Consul health filtering - -Another new feature added by an open-source contributor is support for filtering by Consul health states, added by Linkerd user Andrew Wright (@blacktoe). Consul has a concept of `passing`, `warning` and `critical` health statuses, but the Consul namer previously only supported filtering nodes by a binary health status. To use this feature, add the following to the Consul namer configuration: - -```yml -useHealthCheck: true -healthStatuses: - - 'passing' - - 'warning' -``` - -Where `healthStatuses` is a list of statuses to filter on. Refer to the [documentation](https://linkerd.io/config/1.2.0/linkerd/index.html#consul-configuration) for the Consul namer for more information. In addition, we've made the Consul namer more robust: Consul errors will now cause the namer to fall back to the last good state observed from Consul, and the log messages on these errors have been made more informative. - -### New Kubernetes ConfigMap Interpreter - -Users running Linkerd on Kubernetes may be interested in the new `io.l5d.k8s.configMap` interpreter (marked experimental for now, until it sees more production use). This interpreter will interpret names using a dtab stored in a Kubernetes ConfigMap, and update the dtab if the ConfigMap changes, allowing users on Kubernetes to implement dynamic routing rule changes without running Namerd. - -An example configuration is as follows: - -```yml -routers: -- ... - interpreter: - kind: io.l5d.k8s.configMap - experimental: true - namespace: ns - name: dtabs - filename: my-dtab -``` - -The `namespace` configuration key refers to the name of the Kubernetes namespace where the ConfigMap is stored, while the `name` key refers to the name of the ConfigMap object, and the `filename` key refers to the name of the dtab within the ConfigMap. As this interpreter is still experimental, `experimental: true` must be set for it to be used. - -See the [ConfigMap namer documentation](https://linkerd.io/config/1.2.0/linkerd/index.html#kubernetes-configmap) for more information. - -### Improved Ingress Identifier Configurability (istio) - -For users running Linkerd-powered Istio deployments, Linkerd 1.2.0 allows multiple simultaneous Ingress controllers by [configuring which annotation class each Ingress controller uses](https://github.com/linkerd/linkerd/blob/master/linkerd/docs/protocol-http.md). - -## Bug Fixes and Performance Improvements - -### Improved Kubernetes watches, BUGFIXES, and performance - -This release features [a major refactoring](https://github.com/linkerd/linkerd/pull/1603) of the `io.l5d.k8s` and `io.l5d.k8s.ns` namers. We've rewritten how these namers watch Kubernetes API objects. New code should be much more efficient, leading to major performance improvements. We've also fixed issues where Linkerd would continue routing to Kubernetes services that had been deleted, and some minor routing problems in the ingress controller. Finally, we'd like to thank community member Marcin Mejran ([@mejran](https://github.com/mejran)), who fixed a memory leak in JSON stream parsing which could impact Kubernetes performance. - -### HTTP/2 and gRPC fixes - -For users of the HTTP/2 protocol, we've solved an issue where long-running streams would eventually stop receiving new frames, and we've fixed a memory leak in long-running streams. - -## Breaking Changes and Migration - -### Removed Support for PKCS#1 Keys - -Linkerd 1.2.0 [removes support](https://github.com/linkerd/linkerd/pull/1590) for Public Key Cryptography Standard #1 SSL private keys, which were previously deprecated. If you still have keys in PKCS#1 format, you will need to convert your private keys to PKCS#8. Private keys can be converted with the following command: - -```bash -openssl pkcs8 -topk8 -nocrypt -in $PKCS1.pem -out $PKCS8.pk8 -``` - -Where `$PKCS1` and `$PKCS8` are the file names of the old PKCS#1 key and the new key to output, respectively. If you see errors with messages containing “file does not contain valid private key”, you'll know you need to do this step. For example, the message: - -```txt -WARN 0908 14:03:38.201 CDT finagle/netty4-6: Failed to initialize a channel. Closing: [id: 0xdd6c26dd] -java.lang.IllegalArgumentException: File does not contain valid private key: finagle/h2/src/e2e/resources/linkerd-tls-e2e-key-pkcs1.pem -``` - -Indicates that the key `linkerd-tls-e2e-key-pkcs1.pem` needs to be updated to PKCS#8 format. - -#### Client TLS Configuration - -Linkerd now rejects client TLS configurations which contain both `disableValidation: true` and a `clientAuth` configuration, as disabling validation will cause Linkerd to use the JDK SSL provider, which does not support client authorization. These configurations have always been incompatible, and including both would have previously caused errors at runtime. - -#### Admin and Interpreter Server Configuration - -For improved security, by default Linkerd and Namerd 1.2.0 now serve the admin page, metrics,  `io.l5d.mesh` , and `io.l5d.thriftNameInterpreter` only on 127.0.0.1. (Previously, it bound to every available network interface.) This means that accessing the admin and metrics interfaces from an external IP address will no longer work. If you need to access the admin or metrics pages from an external IP address, you will need to add - -```txt -admin: - ip: 0.0.0.0 - port: 9990 -``` - -To your configuration file. - -#### StatsD Telemeter Deprecation - -The StatsD telemeter (`io.l5d.statsd`) [is now deprecated](https://discourse.linkerd.io/t/deprecating-the-statsd-telemeter/268/1), and will log a warning on use. We've been considering deprecating this telemeter for some time, as it doesn't work the way most users expect and can lead to loss of data and/or greatly increased Linkerd latency. We recommend that users of this telemeter migrate to the InfluxDB telemeter in conjunction with Telegraf. - -In future releases, we will remove this telemeter. - -### Further Information - -The complete changelog for this release is available [on GitHub](https://github.com/linkerd/linkerd/blob/master/CHANGES.md#120-2017-09-07), and updated documentation can be found on [linkerd.io/docs](https://linkerd.io/config/1.2.0/linkerd/index.html). And, as always, if you have any questions or just want to chat about Linkerd, join [the Linkerd Slack](http://slack.linkerd.io/) or browse [the Discourse community forum](https://discourse.linkerd.io) for more in-depth discussion. diff --git a/linkerd.io/content/blog/linkerd-1-3-3-announcement-community-spotlight.md b/linkerd.io/content/blog/linkerd-1-3-3-announcement-community-spotlight.md deleted file mode 100644 index 171781b111..0000000000 --- a/linkerd.io/content/blog/linkerd-1-3-3-announcement-community-spotlight.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -slug: 'linkerd-1-3-3-announcement-community-spotlight' -title: 'Linkerd 1.3.3 Announcement and Community Spotlight' -aliases: - - /2017/12/01/linkerd-1-3-3-announcement-community-spotlight/ -author: 'eliza' -date: Fri, 01 Dec 2017 15:39:26 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_version_133_featured.png -tags: [Buoyant, Community, Linkerd, linkerd, Release Notes] ---- - -Today we’re happy to announce the release of [Linkerd version 1.3.3](https://github.com/linkerd/linkerd/releases/tag/1.3.3), with a number of bug fixes and performance improvements. - -Many Linkerd production users will be especially pleased to see that this release includes fixes for two slow memory leaks that can occur in some conditions. - -Since many of the fixes in this release were written by members of the Linkerd community, we wanted to spotlight the work of open source contributors. - -## Kubernetes AsyncStream Memory Leak - -One of the biggest community contributions in 1.3.3 is Linkerd pull request [#1714](https://github.com/linkerd/linkerd/pull/1714), written by Sergey Grankin ([@sgrankin](https://github.com/sgrankin) on GitHub). Sergey found and [reported](https://github.com/linkerd/linkerd/issues/1694) a slow leak which occurred while using Linkerd as a Kubernetes ingress controller, investigated the issue, and submitted a patch in short order. - -This graph of Linkerd memory usage in our test environment displays the dramatic impact of Sergey’s change: - -{{< fig - alt="Linkerd Memory Usage Graph" - title="Linkerd Memory Usage Graph" - src="/uploads/2018/05/usage.png" >}} - -The orange line shows the memory used by Linkerd 1.3.2, while the green line is memory usage after deploying a build of Sergey’s branch. - -Thanks, Sergey! - -## Netty4 ByteBuf Memory Leak - -1.3.3 also fixes a memory leak caused by incorrect reference counting on a Netty 4 ByteBuf ([#1690](https://github.com/linkerd/linkerd/issues/1690)). Thank you to Linkerd users Zack Angelo ([@zackangelo](https://github.com/zackangelo)) and Steve Campbell ([@DukeyToo](https://github.com/dukeytoo)), who were extremely helpful with reporting and investigating this issue and with validating the fix. Thanks also to Matt Freels ([@freels](https://github.com/freels)) for his help debugging. - -## DNS Namer Record Updating Issues - -Linkerd user Carlos Zuluaga ([@carloszuluaga](https://github.com/carloszuluaga)) submitted a pull request fixing an issue where the DNS SRV record namer failed to update after changes in DNS records ([#1718](https://github.com/linkerd/linkerd/issues/1718)). In addition to being Carlos’ first contribution to the project (welcome, Carlos!), this contribution is noteworthy in that the SRV record namer is an entirely community-contributed component. We’d also like to thank the namer’s original author, Chris Taylor ([@ccmtaylor](https://github.com/ccmtaylor)), for taking such an active role in his contribution’s ongoing maintenance. - -## Namer Plugin Admin UI Fix - -Finally, we’d like to thank another first-time Linkerd contributor, Robert Panzer ([@robertpanzer](https://github.com/robertpanzer)). Robert found and fixed an issue where UI elements added by custom namer plugins were not added to the admin UI ([#1716](https://github.com/linkerd/linkerd/issues/1716)). Linkerd’s plugin interface allows plugins to add nav items and handlers to the admin web UI, but due to an error in the function that registers plugins, these UI items were never actually added to the admin page. Thanks Robert! - -## The Linkerd Community is Amazing - -As always, we’re humbled and gratified to have such a strong open source community around Linkerd. Thanks again to Robert, Carlos, Zack, Steve, Chris, Matt, and Sergey. For a first-hand view into just how helpful the community around Linkerd can be, please join us in the [Linkerd Slack](http://slack.linkerd.io) or on the [Linkerd Support Forum](https://linkerd.buoyant.io/)! diff --git a/linkerd.io/content/blog/linkerd-dcos-microservices-in-production-made-easy.md b/linkerd.io/content/blog/linkerd-dcos-microservices-in-production-made-easy.md deleted file mode 100644 index 3891b1600a..0000000000 --- a/linkerd.io/content/blog/linkerd-dcos-microservices-in-production-made-easy.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -slug: 'linkerd-dcos-microservices-in-production-made-easy' -title: 'Linkerd on DC/OS: Microservices in Production Made Easy' -aliases: - - /2016/04/19/linkerd-dcos-microservices-in-production-made-easy/ -author: 'william' -date: Tue, 19 Apr 2016 22:21:14 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_featured_EASY.png -tags: [buoyant, Linkerd, linkerd, News, Product Announcement] ---- - -As part of the [launch of DC/OS](http://dcos.io/), the open source Mesos ecosystem from Mesosphere, Buoyant, and dozens of other companies, we’re happy to announce the launch of [Linkerd](https://linkerd.io/) for DC/OS! - -As of today, you can install Linkerd and its companion project, namerd, on your DC/OS cluster with two simple commands: - -```bash -dcos package install Linkerd -dcos package install namerd -``` - -Together, Linkerd and namerd form Buoyant’s open source *service mesh* for cloud-native applications. If you’re running software on DC/OS, you’ll get global, programmable control of all HTTP or RPC traffic across your entire application. For devops and SRE teams, this means you’ll be able to gracefully handle service failures and degradations on the fly by failing over across service clusters or shifting traffic to other regions. You’ll also be able to take advantage of powerful new mechanisms for testing the production readiness of new code *before* it serves production traffic. - -While microservices provide many benefits to application scalability and reliability, they also bring new challenges for SRE teams tasked with ensuring reliability and performance. Together, DC/OS and Linkerd directly address these challenges. Mesos automatically handles hardware failure and software crashes, providing the fundamental building blocks around component resilience. Linkerd broadens those semantics to the service level, allowing you to automatically shift traffic away from slow, overloaded, or failing instances. - -{{< fig - src="/uploads/2016/04/routing-diagram.png" - alt="Traffic routing decouples the deployment topology" - title="Traffic routing decouples the deployment topology" >}} - -Linkerd also gives you powerful new mechanisms around *traffic routing*, or runtime control of the HTTP or RPC traffic within an application. By decoupling your application’s deployment topology from its traffic-serving topology, Linkerd makes blue-green deploys, staging, canarying, proxy injection, and pre-production environments easy—even when the services are deep within the application topology. - -In the demo below, we’ll show you how to do a simple, percentage-based blue-green deploy in a microservice application running on DC/OS, and we’ll throw in a cross-service failover for good measure. - -{{< youtube 3fV7v1gyYms >}} - -Together, Linkerd and DC/OS make it incredibly easy to turn a collection of containers into a resilient, scalable, and operable microservice architecture. Best of all, much like DC/OS, Linkerd is built on top of open source technology that powers companies like Twitter, Pinterest, SoundCloud, and ING Bank. With Linkerd on DC/OS, you’ll be able to build your microservices on top of strong open source foundations, and proven, production-tested implementations. diff --git a/linkerd.io/content/blog/linkerd-distributed-tracing.md b/linkerd.io/content/blog/linkerd-distributed-tracing.md index 285f34b5b6..656562ed8b 100644 --- a/linkerd.io/content/blog/linkerd-distributed-tracing.md +++ b/linkerd.io/content/blog/linkerd-distributed-tracing.md @@ -20,8 +20,8 @@ recommendation on the best way to make use of distributed tracing with Linkerd. Tracing can be an invaluable tool in debugging distributed systems performance, especially for identifying bottlenecks and understanding the latency cost of each component in your system. If you're not already familiar with the idea -behind distributed tracing, [Distributed Tracing for Polyglot -Microservices](/2016/05/17/distributed-tracing-for-polyglot-microservices/) +behind distributed tracing, the [OpenTelemetry Observability +Primer](https://opentelemetry.io/docs/concepts/observability-primer/#what-is-observability) gives a good overview of the concepts. The promises of distributed tracing are exciting, but in our experience, there are significant barriers to achieving those promises in practice. diff --git a/linkerd.io/content/blog/linkerd-expands-community-roles-second-birthday.md b/linkerd.io/content/blog/linkerd-expands-community-roles-second-birthday.md deleted file mode 100644 index d637a3f528..0000000000 --- a/linkerd.io/content/blog/linkerd-expands-community-roles-second-birthday.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -slug: 'linkerd-expands-community-roles-second-birthday' -title: 'Linkerd expands community roles for its second birthday' -aliases: - - /2018/02/16/linkerd-expands-community-roles-second-birthday/ -author: 'gmiranda23' -date: Fri, 16 Feb 2018 22:17:40 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_faster_featured.png -tags: [buoyant, Community, Linkerd, linkerd, News] ---- - -Looking back, it's hard to believe that when [Linkerd 0.1.0 was released](https://buoyant.io/2016/02/18/linkerd-twitter-style-operability-for-microservices/) two years ago, the "service mesh" category of tools was instantly born and the world suddenly changed. It's hard to believe because that's not quite how it happened. - -Back then, our goal was simple: bring the power of Finagle---the system that had transformed Twitter from a failing monolithic app to a massive microservices architecture---to the rest of the world. The first iteration of Linkerd inherited a lot of Finagle’s view of the world, to the point where we were calling Linkerd a "RPC proxy" or a "dynamic linker for cloud-native apps." That made a lot of sense to us at Buoyant, but less so to the rest of the world. We still had a lot to learn about what solutions actually work for real world applications, and it was a little while before the "service mesh" model really caught on. - -The community that has grown around it since has been incredible. A tiny startup (at the time) called [Monzo](https://monzo.com/) was one of our earliest adopters. They’ve continued to stay involved in the Linkerd project while growing at an amazing pace! We’re proud to have played a small part in their journey and many others. Most importantly, the community around Linkerd has grown to include an impressive cast of contributors and adopters from around the world as a result. - -{{< fig - alt="happy birthday" - title="Happy birthday" - src="/uploads/2018/05/happy-birthday-linkerd-1024x550.jpg" >}} - -Fast forward two years and Linkerd is now powering the production architecture of massive companies with household names like Salesforce, PayPal, and Expedia. At Buoyant, we've been able to build a growing company around Linkerd that is singularly focused on service mesh technology and building products that help make the next generation of applications safe and reliable for everyone. (If you love what we're doing at Buoyant, check out our careers page because [we're hiring](https://buoyant.io/careers/)!) This project may have started within the walls of Buoyant, but the success of Linkerd is something we simply can’t achieve without the support of the vibrant community it’s created. - -{{< fig - alt="linkerd 2nd birthday collage" - src="/uploads/2018/05/collage-birthday.jpg" - title="Linkerd’s 2nd Birthday Party" >}} - -For the past two years, you’ve used Linkerd in anger and helped us develop solutions to the very real problems you were having. Together, we've seen Linkerd grow to serve over a trillion service requests. That's huge! That type of adoption was the first step necessary to helping Linkerd become less of a Buoyant-specific open source development effort, towards a more genuinely community-driven project. Contributing [Linkerd to the CNCF](https://buoyant.io/2017/01/23/linkerd-joins-the-cloud-native-computing-foundation/) last year was another step in that direction. Now with Linkerd’s second birthday, we’re taking yet another. - -Today we’re happy to announce that two of our most prolific and helpful community contributors--[Borys Pierov](https://github.com/Ashald) ([NCBI](https://www.ncbi.nlm.nih.gov/)) and [Zack Angelo](https://github.com/zackangelo) ([BigCommerce](https://www.bigcommerce.com))--are now also project maintainers for Linkerd. Zack and Borys have been an integral part of our community and we’re proud that they’ve chosen to double down on their awesomeness and involvement by sharing the responsibility for shaping and guiding the Linkerd project as a whole. Thank you! - -We're excited to cross this milestone in broadening what it means to be a part of the Linkerd community. There’s a lot more to come. Keep a look out for Linkerd 1.3.6 (which is just around the corner!). We look forward to continuing to work with you all as we charge into our third year together. - -Happy birthday, Linkerd! diff --git a/linkerd.io/content/blog/linkerd-joins-the-cloud-native-computing-foundation.md b/linkerd.io/content/blog/linkerd-joins-the-cloud-native-computing-foundation.md index 7aab595cce..bfc8ae1655 100644 --- a/linkerd.io/content/blog/linkerd-joins-the-cloud-native-computing-foundation.md +++ b/linkerd.io/content/blog/linkerd-joins-the-cloud-native-computing-foundation.md @@ -11,7 +11,15 @@ thumbnail: /uploads/linkerd_cncf_featured.png tags: [Buoyant, buoyant, Linkerd, linkerd, News] --- -_Cross-posted on the [Cloud Native Computing Foundation blog](https://www.cncf.io/blog/2017/01/23/linkerd-project-joins-cloud-native-computing-foundation)._ +_(This bit of Linkerd history appeared on 24 January 2017, cross-posted on +the [Cloud Native Computing Foundation +blog](https://www.cncf.io/blog/2017/01/23/linkerd-project-joins-cloud-native-computing-foundation). +Linkerd has evolved quite a lot since 2017: in particular, what we donated in +2017 was Linkerd 1, not the powerful, secure, Rust-based Linkerd 2 of today. +However, we'll always be proud to have been a part of the early history of the +CNCF, and grateful for the phenomenal support of our community. Thank you!)_ + +---- Today, the [Cloud Native Computing Foundation](http://cncf.io/)’s (CNCF) Technical Oversight Committee (TOC) voted to accept [Linkerd](https://linkerd.io/) as its fifth hosted project, alongside [Kubernetes](https://kubernetes.io/), [Prometheus](https://prometheus.io/), [OpenTracing](https://opentracing.io/) and [Fluentd](https://www.fluentd.org/). @@ -26,10 +34,9 @@ Linkerd is an open source, resilient service mesh for cloud-native applications. ## ENABLING RESILIENT AND RESPONSIVE MICROSERVICE ARCHITECTURES -Linkerd enables a consistent, uniform layer of visibility and control across services and adds features critical for reliability at scale, including latency-aware load balancing, connection pooling, automatic retries and circuit breaking. As a service mesh, Linkerd also provides transparent TLS encryption, distributed tracing and request-level routing. These features combine to make applications scalable, performant, and resilient. Linkerd integrates directly with orchestrated environments such as Kubernetes ([example]({{< ref -"a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}})) and DC/OS ([demo](https://www.youtube.com/watch?v=3fV7v1gyYms)), and supports a variety of service discovery systems such as ZooKeeper, Consul, and etcd. It features HTTP/2 and [gRPC](http://www.grpc.io/) support, and can provide metrics in [Prometheus](https://prometheus.io/) format. +Linkerd enables a consistent, uniform layer of visibility and control across services and adds features critical for reliability at scale, including latency-aware load balancing, connection pooling, automatic retries and circuit breaking. As a service mesh, Linkerd also provides transparent TLS encryption, distributed tracing and request-level routing. These features combine to make applications scalable, performant, and resilient. Linkerd integrates directly with orchestrated environments such as Kubernetes and DC/OS, and supports a variety of service discovery systems such as ZooKeeper, Consul, and `etcd`. It features HTTP/2 and [gRPC](http://www.grpc.io/) support, and can provide metrics in [Prometheus](https://prometheus.io/) format. -“The service mesh is becoming a critical part of building scalable, reliable cloud native applications,” said [William Morgan](https://twitter.com/wm), CEO of Buoyant and co-creator of Linkerd. “Our experience at Twitter showed that, in the face of unpredictable traffic, unreliable hardware, and a rapid pace of production iteration, uptime and site reliability for large microservice applications is a function of how the services that comprise that application communicate. Linkerd allows operators to manage that communication at scale, improving application reliability without tying it to a particular set of libraries or implementations. +“The service mesh is becoming a critical part of building scalable, reliable cloud native applications,” said [William Morgan](https://twitter.com/wm), CEO of Buoyant and co-creator of Linkerd. “Our experience at Twitter showed that, in the face of unpredictable traffic, unreliable hardware, and a rapid pace of production iteration, uptime and site reliability for large microservice applications is a function of how the services that comprise that application communicate. Linkerd allows operators to manage that communication at scale, improving application reliability without tying it to a particular set of libraries or implementations." Companies and organizations around the world use Linkerd in production to power their software infrastructure, including [Monzo](https://monzo.com/), [Zooz](https://zooz.com/), [ForeSee](https://foresee.com/), [Olark](https://olark.com/), [Houghton Mifflin Harcourt](https://hmhco.com/), the [National Center for Biotechnology Information](https://www.ncbi.nlm.nih.gov/), and [Douban](https://www.douban.com/). Linkerd is featured as a default part of cloud-native distributions such as Apprenda’s [Kismatic Enterprise Toolkit](https://github.com/apprenda/kismatic) and StackPointCloud. diff --git a/linkerd.io/content/blog/linkerd-on-dcos-for-service-discovery-and-visibility.md b/linkerd.io/content/blog/linkerd-on-dcos-for-service-discovery-and-visibility.md deleted file mode 100644 index 0f546fe266..0000000000 --- a/linkerd.io/content/blog/linkerd-on-dcos-for-service-discovery-and-visibility.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -slug: 'linkerd-on-dcos-for-service-discovery-and-visibility' -title: 'Linkerd on DC/OS for Service Discovery and Visibility' -aliases: - - /2016/10/10/linkerd-on-dcos-for-service-discovery-and-visibility/ -author: 'andrew' -date: Mon, 10 Oct 2016 22:45:50 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_featured_DCOS.png -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -In our previous post, [Linkerd as a service mesh for Kubernetes][part-i], we -showed you how to use Linkerd on Kubernetes for drop-in service discovery and -monitoring. In this post, we’ll show you how to get the same features -on [DC/OS](https://dcos.io/), and discuss how this compares with DNS-based -solutions like Mesos-DNS. - -When building applications in a scheduled environment like DC/OS, one of the -first questions you’ll face is how to do service discovery. Similar to -Kubernetes, DC/OS -provides several service discovery options out of the box, -including at least one DNS-based option. But what exactly is service discovery, -and how is it different from DNS? - -Service discovery is how your applications and services find each other. Given -the name of a service, service discovery tells you where that service is: on -what IP/port pairs are its instances running? Service discovery is an essential -component of multi-service applications because it allows services to refer to -each other by name, independent of where they’re deployed. Service discovery is -doubly critical in scheduled environments like DC/OS because service instances -can be rescheduled, added, or removed at any point, so where a service is is -constantly changing. - -## Why not DNS? - -An analogous system to service discovery is DNS. DNS was designed to answer a -similar question: given the hostname of a machine, e.g. `buoyant.io`, what is -the IP address of that host? In fact, DNS can be used as a basic form of service -discovery, and DC/OS ships -with Mesos-DNS out -of the box. - -Although DNS is widely supported and easy to get started with, in practice, it -is difficult to use DNS for service discovery at scale. First, DNS is primarily -used to locate services with “well-known” ports, e.g. port 80 for web servers, -and extending it to handle arbitrary ports is difficult (while SRV records exist -for this purpose, library support for them is spotty). Second, DNS information -is often aggressively cached at various layers in the system (the operating -system, the JVM, etc.), and this caching can result in stale data when used in -highly dynamic systems like DC/OS. - -As a result, most systems that operate in scheduled environments rely on a -dedicated service discovery system such as ZooKeeper, Consul, or etcd. -Fortunately, on DC/OS, Marathon itself can act as source of service discovery -information, eliminating much of the need to run one of these separate systems- -at least, if you have a good way of connecting your application to Marathon. -Enter Linkerd! - -## Using Linkerd for service discovery - -[Linkerd](https://linkerd.io/) is a service mesh for cloud-native applications. -\[It provides a baseline layer of reliability for service-to-service -\[communication that’s transparent to the application itself. One aspect of this -\[reliability is service discovery. - -For DC/OS users, -the [Linkerd Universe package](https://github.com/mesosphere/universe/tree/version-3.x/repo/packages/L/linkerd/6) -is configured out of the box to do service discovery directly from Marathon. -This means that applications and services can refer to each other by their -Marathon task name. For example, a connection to `http://myservice` made via -Linkerd will be sent to an instance of the Marathon application myservice, -independent of DNS. Furthermore, Linkerd will intelligently load-balance across -all instances of myservice, keeping up-to-date as Marathon adds or removes -service instances. - -The DC/OS Linkerd Universe package installs a Linkerd instance on each node in -the cluster and configures it to act as an HTTP proxy. This means that most HTTP -applications can use Linkerd simply by setting the `http_proxy` environment -variable to localhost:, without code changes. (For non-HTTP applications, or -situations where setting this environment variable is not viable, Linkerd can -still be used -with [a little more configuration](https://api.linkerd.io/latest/linkerd/index.html).) - -Let’s walk through a quick demonstration of installing Linkerd and using it for -service discovery. After this step, we’ll also show you how, once it’s -installed, you can also easily use Linkerd to capture and display top-line -service metrics like success rates and request latencies. - -## Installing Linkerd - -### STEP 0: PREREQUISITES - -You will need: - A running DC/OS cluster. - -The DC/OS CLI installed. - -### STEP 1: DEPLOY A SAMPLE APPLICATION - -First, we’ll deploy a simple example application. Use -the [webapp.json][webapp.json] example application (borrowed from -this [Marathon guide](https://mesosphere.github.io/marathon/docs/native-docker.html)) -from the DC/OS CLI as follows: - -```bash -dcos marathon app add https://raw.githubusercontent.com/linkerd/linkerd-examples/master/dcos/webapp.json -``` - -### STEP 2: INSTALL THE LINKERD UNIVERSE PACKAGE - -We now have a working web server, though we have no clear way to discover or -route to it. Let’s fix that by installing Linkerd. The only configuration we -need at this point is the total number of nodes in the cluster. Use that number -to run the following command: - -```bash -dcos package install --options=<(echo '{"linkerd":{"instances":}}') linkerd -``` - -Where `` is the number of nodes in your DC/OS cluster. Voila! -Linkerd is now running on every node in your DC/OS cluster and is ready to route -requests by Marathon task name. To make sure everything is working, run this -command, replacing `` with the address of a public node in your DC/OS -cluster. - -```bash -$ http_proxy=:4140 curl -s http://webapp/hello -Hello world -``` - -We’ve now routed a simple HTTP request to the Hello World app by using its -Marathon task name. This works on all DC/OS nodes, whether public or private. In -other words, all HTTP applications can now discover and route to each other by -Marathon task name by using Linkerd as an HTTP proxy. - -### STEP 3: INSTALL THE LINKERD-VIZ UNIVERSE PACKAGE - -Now that we have a sample application and a means to discover and route to it, -let’s take a look at how it’s performing! Is it receiving requests? Is it -producing successful responses? Is it responding fast enough? As a service mesh, -Linkerd understands enough about the service topology and requests to keep -accurate, live statistics to answer these questions. We’ll start by installing -the [linkerd-viz](https://github.com/linkerd/linkerd-viz) Universe package: - -```bash -dcos package install linkerd-viz -``` - -This package will install a basic dashboard. Let’s take a peek: - -```bash -open $(dcos config show core.dcos_url)/service/linkerd-viz -``` - -{{< fig - alt="linkerd dcos" - title="linkerd dcos" - src="/uploads/2017/07/buoyant-linkerd-viz-dcos.png" >}} - -You should see a dashboard of all your running services and selectors by service -and instance. The dashboard includes three sections: - -- **TOP LINE** Cluster-wide success rate and request volume. -- **SERVICE METRICS** One section for each application deployed. Includes - success rate, request volume, and latency. -- **PER-INSTANCE METRICS** Success rate, request volume, and latency for each - node in your cluster. - -Great! Now let’s add some load to the system and make our dashboard a bit more -interesting: - -```bash -export http_proxy=:4140 -while true; do curl -so /dev/null webapp; done -``` - -{{< fig - alt="linkerd viz" - title="linkerd viz" - src="/uploads/2017/07/buoyant-linkerd-viz-dcos-load.png" >}} - -Note how the dashboard updates automatically to capture this traffic and the -behavior of the systems—all without configuration on your end. So there you have -it. With just three simple commands, we were able to install Linkerd on our -DC/OS cluster, install an app, use Linkerd for service discovery, and get -instant visibility into the health of all our services. - -## Next Steps - -In the examples above, we’ve used Linkerd to talk to Marathon. But Linkerd has a -powerful routing language that allows you to use multiple forms of service -discovery simultaneously, to express precedence or failover rules between them, -and to migrate traffic from one system to another. All, of course, without the -application having to be aware of what’s happening. - -Even better, Linkerd is already providing us with much more than visibility and -service discovery. By using Linkerd as a service mesh, we’ve actually enabled -latency-aware load balancing, automatic retries and circuit breaking, -distributed tracing, and more. - -To read more about these features and how to take advantage of them in your -application, take a look at the -comprehensive [Linkerd documentation](https://linkerd.io/documentation/). - -Linkerd also has a thriving community of users and developers. If you get stuck, -need help, or have questions, feel free to reach out via one of the following -channels: - -- The [Linkerd slack](http://slack.linkerd.io/) -- The [Linkerd Support Forum](https://linkerd.buoyant.io/) -- Email us directly at support@buoyant.io - -## Acknowledgments - -This post was co-authored -with [Ravi Yadav](https://twitter.com/RaaveYadav) from [Mesosphere](https://d2iq.com/solutions/mesosphere). -Thanks -to [Alex Leong](https://twitter.com/adlleong) and [Oliver Gould](https://twitter.com/olix0r) for -feedback on earlier drafts of this post. - -[webapp.json]: - https://raw.githubusercontent.com/linkerd/linkerd-examples/master/dcos/webapp.json - -[part-i]: -{{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} diff --git a/linkerd.io/content/blog/linkerd-one-hundred-billion-production-requests.md b/linkerd.io/content/blog/linkerd-one-hundred-billion-production-requests.md deleted file mode 100644 index 063b58fd0c..0000000000 --- a/linkerd.io/content/blog/linkerd-one-hundred-billion-production-requests.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: 'linkerd-one-hundred-billion-production-requests' -title: 'Announcing one hundred billion production requests!' -aliases: - - /2017/03/07/linkerd-one-hundred-billion-production-requests/ -author: 'william' -date: Wed, 08 Mar 2017 00:25:37 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_hundred_billion_featured.png -tags: [Buoyant, buoyant, linkerd, News] ---- - -We’re happy to announce that, one year after version 0.1.0 was released, [Linkerd](https://linkerd.io/) has processed over **100 billion production requests** in companies around the world. Happy birthday, Linkerd! Let’s take a look at all that we’ve accomplished over the past year. - -We [released Linkerd into the wild in February 2016]({{< ref -"linkerd-twitter-style-operability-for-microservices" >}}), with nothing more than a couple commits, a few early contributors, and some very big dreams. Fast-forward by one year, and Linkerd has already grown to [30+ releases](https://github.com/linkerd/linkerd/releases), [800+ commits](https://github.com/linkerd/linkerd/commits/master), 1500+ stars, [30+ contributors](https://github.com/linkerd/linkerd/graphs/contributors), 600+ people in the [Linkerd Slack](https://slack.linkerd.io/), and 30-odd companies around the globe using it in production (or on the path to production)—including folks like [Monzo](https://monzo.com/), [Zooz](https://zooz.com/), NextVR, [Houghton Mifflin Harcourt](https://hmhco.com/), [Olark](https://olark.com/) and [Douban](https://douban.com/). - -Not to mention, of course, that [Linkerd is now officially a CNCF project](https://www.cncf.io/blog/2017/01/23/linkerd-project-joins-cloud-native-computing-foundation), alongside [Kubernetes](https://kubernetes.io/), [Prometheus](https://prometheus.io/), [gRPC](http://www.grpc.io/), and a couple other other amazing projects that are defining the very landscape of cloud native infrastructure. - -To the many contributors, users, and community members—thank you for helping us make Linkerd so successful this past year. (And thank you for privately sharing your production request volumes and deployment dates, which allow us to make big claims like the one above!) We couldn’t have asked for a better community. We’d especially like to thank [Oliver Beattie](https://github.com/obeattie), [Jonathan Bennet](https://github.com/JonathanBennett), [Abdel Dridi](https://github.com/halve), [Borys Pierov](https://github.com/Ashald), [Fanta Gizaw](https://github.com/fantayeneh), [Leo Liang](https://github.com/leozc), [Mark Eijsermans](https://github.com/markeijsermans), [Don Petersen](https://github.com/dpetersen), and [Oleksandr Berezianskyi](https://github.com/OleksandrBerezianskyi) for their contributions to the project and the community. - -Finally, here’s a fun vanity metric graph, courtesy of [Tim Qian’s excellent Github star history plotter](https://github.com/timqian/star-history): - -{{< fig - alt="star history" - title="Star history" - src="/uploads/2017/07/buoyant-linkerd-github-star-history.png" >}} - -Here’s to another great year for Linkerd! - -— [William](https://twitter.com/wm), [Oliver](https://twitter.com/olix0r), and the gang at [Buoyant](https://buoyant.io/). diff --git a/linkerd.io/content/blog/linkerd-twitter-style-operability-for-microservices.md b/linkerd.io/content/blog/linkerd-twitter-style-operability-for-microservices.md deleted file mode 100644 index 1104dbaf61..0000000000 --- a/linkerd.io/content/blog/linkerd-twitter-style-operability-for-microservices.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -slug: 'linkerd-twitter-style-operability-for-microservices' -title: 'Linkerd: Twitter-style Operability for Microservices' -aliases: - - /2016/02/18/linkerd-twitter-style-operability-for-microservices/ -author: 'william' -thumbnail: /uploads/linkerd_featured_operability.png -date: Thu, 18 Feb 2016 22:51:16 +0000 -draft: false -featured: false -tags: [Buoyant, buoyant, Linkerd, linkerd, News] ---- - -How do you operate modern, cloud-native applications at scale? What problems arise in practice, and how are they addressed? What is *actually* required to run a cloud-native, microservices-based application under high-volume and unpredictable workloads, without introducing friction to feature releases or product changes? - -For all the talk about microservices, it turns out that very few people can actually answer these questions. The rapid rise of exciting new technologies like Docker, Mesos, Kubernetes, and gRPC easily makes armchair architects of us all. But actual high-traffic, production usage? By our reckoning, the number of companies that have actually solved the problems of running microservices at scale is a handful at best. - -Twitter is one of those companies. And while it’s certainly had its share of public outages, it operates one of the [highest-scale microservice applications in the world](https://blog.twitter.com/2013/new-tweets-per-second-record-and-how), comprising hundreds of services, tens of thousands of nodes, and millions of RPS per service. Shockingly enough, it turns out that this is [not easy to do](https://www.slideshare.net/InfoQ/decomposing-twitter-adventures-in-serviceoriented-architecture). The problems that arise are [not obvious](https://web.archive.org/web/20181205153929/https://www.somethingsimilar.com/2013/01/14/notes-on-distributed-systems-for-young-bloods/). The failure modes are [surprising](http://roc.cs.berkeley.edu/papers/dsconfig.pdf), [hard to predict](http://web.archive.org/web/20141009231131/http://www.ctlab.org/documents/How%20Complex%20Systems%20Fail.pdf), and sometimes even [hard to describe](https://blog.twitter.com/2012/today-s-turbulence-explained). It can be done, but it takes [years of thought and work](https://monkey.org/~marius/redux.html) to make everything work well in practice. - -When Oliver and I left Twitter in the not-too-distant past, our goal was to take these years of operational knowledge and turn them into something that the rest of the world could use. Happily, a tremendous amount of that knowledge was already encoded in an open-source project called [Finagle](http://finagle.github.io/), the high-throughput RPC library that powers Twitter’s microservice architecture. - -Finagle is Twitter’s core library for managing the communication between services. Practically every online service at Twitter is built on Finagle, and it powers millions upon millions of RPC calls every second. And it’s not just Twitter—Finagle powers the infrastructure at [Pinterest](https://www.pinterest.com/), [SoundCloud](https://soundcloud.com/), [Strava](https://www.strava.com/), [StumbleUpon](http://www.stumbleupon.com/), and [many other companies](https://github.com/twitter/finagle/blob/master/ADOPTERS.md). - -Today, we’re happy to announce a small step towards our vision of making Finagle usable by the masses. **[Linkerd](http://linkerd.io/)** has hit 0.1.0, and we’re open-sourcing it under the [Apache License v2](http://www.apache.org/licenses/LICENSE-2.0). - -{{< fig - alt="logo" - title="logo" - src="/uploads/2017/07/buoyant-linkerd-logo.png" >}} - -**Linkerd** is our open-source *service mesh* for cloud-native applications. It’s built directly on Finagle, and is designed to give you all the operational benefits of Twitter’s microservice-based, orchestrated architecture—those many lessons learned over many years—in a way that’s self-contained, has minimal dependencies, and can be dropped into existing applications with a minimum of change. - -If you’re building a microservice and want to take advantage of the benefits of Finagle—including [intelligent, adaptive load balancing](https://linkerd.io/features/load-balancing/), [abstractions over service discovery](https://linkerd.io/features/service-discovery/), and [intra-service traffic routing](https://linkerd.io/features/routing/)—you can use Linkerd to add these features without having to change your application code. Plus, fancy dashboards! - -{{< fig - alt="linkerd dashboard" - title="linkerd dashboard" - src="/uploads/2017/07/buoyant-linkerd-dashboard.png" >}} - -Linkerd isn’t complete yet, but in the spirit of “release early and release often”, we think it’s time to get this baby out to the wild. - -So if this piques your interest, start with [linkerd.io](https://linkerd.io/) for docs and downloads. And if you’re interested in contributing, head straight to the [Linkerd Github repo](https://github.com/linkerd/linkerd). We’re strong believers in open source—Finagle itself has been open source since almost the beginning—and we’re excited to build a community around this. - -We have a long roadmap ahead of us, and a huge list of exciting features we’re looking forward to adding to Linkerd. [Come join us](https://slack.linkerd.io/)! - -—[William](https://twitter.com/wm), [Oliver](https://twitter.com/olix0r), and the [whole team at Buoyant](https://buoyant.io/). - -(If you’re wondering about the name: we like to think of Linkerd as a “dynamic linker” for cloud-native apps. Just as the dynamic linker in an OS takes the name of a library and a function, and does the work necessary to *invoke* that function, so too Linkerd takes the name of a service and an endpoint, and does the work necessary to make that call happen—safely, securely and reliably. See [Marius’s talk at FinagleCon](https://monkey.org/~marius/redux.html) for more about this model.) diff --git a/linkerd.io/content/blog/making-microservices-more-resilient-with-circuit-breaking.md b/linkerd.io/content/blog/making-microservices-more-resilient-with-circuit-breaking.md deleted file mode 100644 index 5a30b5bb76..0000000000 --- a/linkerd.io/content/blog/making-microservices-more-resilient-with-circuit-breaking.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -slug: 'making-microservices-more-resilient-with-circuit-breaking' -title: 'Making microservices more resilient with circuit breaking' -aliases: - - /2017/01/13/making-microservices-more-resilient-with-circuit-breaking/ -author: 'kevin' -date: Sat, 14 Jan 2017 00:17:03 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_circuit_breaking_featured.png -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -One of the inevitabilities of moving to a microservices architecture is that you’ll start to encounter *partial failures*—failures of one or more instances of a service. These partial failures can quickly escalate to full-blown production outages. In this post, we’ll show how circuit breaking can be used to mitigate this type of failure, and we’ll give some example circuit breaking strategies and show how they affect success rate. - -In previous posts, we’ve addressed the [crucial role of load balancing]({{< -relref "beyond-round-robin-load-balancing-for-latency" >}}) in scaling microservice applications. Given a service with many instances, requests to that service must be balanced over all of the instances. This balancing can be done at the connection level, e.g. with systems like kubeproxy (Kubernetes) or Minuteman (DC/OS); or it can be done at the request level, with systems like haproxy or with [Linkerd, our service mesh for cloud native applications](https://linkerd.io/). - -Request-level load balancing requires protocol-specific code. A system that balances HTTP traffic cannot be used to balance requests to, say, memcache, without adding memcache support. However, request balancing also allows for powerful techniques that can dramatically improve system resilience. For example, Linkerd does [latency-aware load balancing](https://linkerd.io/features/load-balancing/), which allows it to reduce end-to-end latency by automatically shifting traffic away from slow instances. This can make a tremendous difference in overall application performance. - -Hence, request-level load balancing protects our requests from the degradation that is caused by a slow backend instance. But what happens when an instance isn’t slow, but is failing requests? If a given instance is returning errors, but doing so rapidly, latency-aware load balancing might actually send it more traffic! These failing requests may later be retried against other instances, but this is clearly not a good situation. - -## Circuit breaking - -Enter [_circuit breaking_](https://martinfowler.com/bliki/CircuitBreaker.html). As the name suggests, circuit breaking will shut off traffic to an individual instance if it fails requests too frequently. For example, if an instance starts failing 50% of requests, circuit breaking could further prevent requests from hitting this instance in the first place. By removing failing instances from the load balancer pool, circuit breaking can not only improve overall success rate, it can reduce latency by reducing the rate of retries. - -Even better, circuit breaking can provide powerful benefits to failure *recovery*. Since partial failures are often tied to high request load, a tripped circuit breaker can give failing instances a chance to recover by reducing the overall amount of traffic that they are receiving. In the event of a full-service outage, circuit breaking can even help protect against resource depletion, a situation in which multiple callers are stuck waiting on responses from a failing service. Resource depletion is a common cause of cascading failures within distributed systems. - -Implementing circuit breaking in your own system can be tricky, however, especially if there are multiple frameworks and languages in play. When should the breaker be tripped? When should it be reset? Should we make the failure determination by number of failures, by a rate, or by a ratio? How do we know when an instance is healthy if it’s not receiving traffic? These details, and the complex interplay between circuit breaking, load balancing, and retry strategies, can be quite difficult to get right in practice. - -Fortunately, [Linkerd](https://linkerd.io/) makes available the battle-hardened, incredibly well-tested circuit breaking code that’s built into [Finagle](http://twitter.github.io/finagle/guide/). Like many of Linkerd’s reliability features, this code is used at scale every day at companies like Twitter, Pinterest, and Soundcloud. And as of the [Linkerd 0.8.5 release](https://github.com/linkerd/linkerd/releases/tag/0.8.5), we are happy to report that Linkerd’s circuit breaking is [now configurable][configurable] by changing the way Linkerd does “failure accrual”—the measure of instance health that determines whether a circuit breaker is tripped. This means that you can tune Linkerd’s circuit breaking for the specific failure patterns you want to prevent. - -In the rest of this post, we’ll explore the impact of various failure accrual settings on success rate, and we’ll demonstrate a specific case where choosing the right circuit breaking strategy immediately improves site success rate in the presence of partial failures. - -### Setup - -To test various circuit breaking settings, we’ve setup a failure-accrual demo project in the [linkerd-examples repo](https://github.com/linkerd/linkerd-examples). The [project’s README][readme] contains a lot of helpful information about how the demo is constructed, but suffice to say you can run it locally by cloning the repo and spinning up all of the services using docker-compose. Like so: - -```bash -git clone https://github.com/linkerd/linkerd-examples.git -cd linkerd-examples/failure-accrual -docker-compose build && docker-compose up -d -``` - -The demo runs Linkerd configured with multiple different routers, each using a different failure accrual configuration, as well as a Grafana dashboard to compare router performance. To view the dashboard, go to port 3000 on your docker host. It will look like this: - -{{< fig - alt="Dashboard" - title="Dashboard" - src="/uploads/2017/07/buoyant-circuit-breaking-dashboard-1024x689.png" >}} - -Armed with this dashboard, we’re now ready to start our comparison of different circuit breaking configurations. - -### Circuit breaking configurations - -Linkerd’s failure accrual policy consists of two parts, as follows: - -- **Failure threshold**: This is the part of the policy that Linkerd uses to determine whether or not it should remove an instance from the load balancer pool. Once an instance has been removed, Linkerd will periodically send it test requests (called probes). If the probe succeeds, the instance will be added back into the load balancer pool. There are multiple different types of thresholds available, and those are covered in the rest of this section. -- **Backoff interval**: This is the part of the policy that Linkerd uses to determine how often it should probe removed instances to determine if they have recovered. There are two types of backoff intervals: constant and jittered. A constant backoff instructs Linkerd to wait a fixed amount of time between each probe. A jittered backoff is configured with a minimum and maximum wait time. Linkerd will send its first probe after the minimum wait time, but if the probe fails, it will increase its wait time before sending the next probe, until the maximum wait time is reached. - -Linkerd’s default policy uses a threshold of 5 consecutive failures, with a jittered backoff interval between 5 seconds and 5 minutes. - -In the experiments below, we will vary the type of failure threshold used in each configuration. All configurations use the same constant backoff interval for the sake of comparison. We will also disable retries to better show the impact of changing failure accrual policy. In practice, it is likely that you want to enable retries. - -#### NO CIRCUIT BREAKING - -Let’s start by looking at the router that’s configured to disable circuit breaking altogether, with the following configuration: - -```yml -routers: - - protocol: http - client: - failureAccrual: - kind: none -``` - -As you would expect, under this configuration, Linkerd *never* removes unhealthy instances from the load balancer pool. In our demo setup, where one of the 5 backend instances has a 60% success rate, this yields the following success rate and distribution of requests across backend instances: - -{{< fig - alt="Dashboard" - title="Dashboard" - src="/uploads/2017/07/buoyant-circuit-breaking-none-1024x299.png" >}} - -You can see that the failing instance is also serving fewer requests, since our demo setup adds latency to failing requests and Linkerd is load balancing requests based on observed queue sizes. It’s worth noting that the overall success rate would be even worse with a naive load balancing algorithm, such as round robin, where all instances receive the same number of requests, regardless of performance. In that case we’d expect to see overall success rate hovering around 92%, with the unhealthy backend failing 2 out of every 25 requests to the cluster. - -#### CONSECUTIVE FAILURES - -Next, let’s look at the router with Linkerd’s default circuit breaking configuration, which has a failure threshold of 5 consecutive failures: - -```yml -routers: - - protocol: http - client: - failureAccrual: - kind: io.l5d.consecutiveFailures - failures: 5 - backoff: - kind: constant - ms: 10000 -``` - -This configuration tells Linkerd to remove any instance that has failed 5 requests in a row. In our demo setup it yields the following success rate and distribution of requests across backend instances: - -{{< fig - alt="Dashboard" - title="Dashboard" - src="/uploads/2017/07/consecutive-failures.png" >}} - -This configuration, with Linkerd’s default circuit breaking, shows a marked improvement in overall success rate compared to the previous configuration with no circuit breaking—from around 95% up to 97%. But it’s not perfect. At a 60% success rate for our one failing instance, there’s a very low probability that the unhealthy instance emits 5 consecutive failures and trips the circuit breaker. Thus, the failing instance remains in the load balancer pool for large portions of the run, as illustrated in the requests per instance graph above. Removing this instance from the pool more quickly would improve the overall success rate. - -#### SUCCESS RATE - -While the default configuration above is clearly an improvement, we can further improve the effectiveness of our failure threshold since we know the approximate success rate and volume of requests to the failing backend. In the final configuration, we use a failure threshold based on a target success rate over a bounded number of requests, with the following configuration: - -```yml -- protocol: http - client: - failureAccrual: - kind: io.l5d.successRate - successRate: 0.9 - requests: 20 - backoff: - kind: constant - ms: 10000 -``` - -This configuration tells Linkerd to remove any instance that has a success rate below 90%, computed over its 20 most recently completed requests. In our demo setup it yields the following success rate and distribution of requests across backend instances: - -{{< fig - alt="Dashboard" - title="Dashboard" - src="/uploads/2017/07/buoyant-circuit-breaking-success-rate-1024x299.png" >}} - -This improves our success rate from 97% in the previous configuration to approximately 99%. You can also see that, in this configuration, the unhealthy instance reaches the configured failure threshold much more quickly than in the previous configuration, which results in it being removed from the load balancer pool much more rapidly. - -### Discussion - -We have shown that picking the right circuit breaking settings can have a dramatic impact on overall success rate. Let’s put all three success rates side-by-side, using a 90-second moving average to eliminate the spikes that come from adding and removing instances in quick succession: - -{{< fig - alt="Dashboard" - title="Dashboard" - src="/uploads/2017/07/buoyant-circuit-breaking-success-rates-1024x548.png" >}} - -As you can see, the windowed success-rate-based circuit breaking yielded the highest success rate, around 99%, while the configuration without circuit breaking was around a 95% success rate—a tremendous difference. Circuit breaking based on consecutive failures was in the middle, with a success rate of around 97% over the window measured. - -Of course, it’s important to recognize the tradeoffs that come with different failure accrual policies. Setting a failure threshold that is too low, and thus more likely to deem instances to be unhealthy faster, will improve success rate under certain conditions, but also runs the risk of shrinking the pool to an unacceptably small size. Conversely, setting a failure accrual threshold that is too high will result in fewer spurious removals, but it will also allow unhealthy instances to stay in the pool longer than they should. - -The two types of policies, success rate and consecutive failures, are aimed at different types of failure scenarios. The consecutive failures policy (which is really just a special case of the success rate policy with the success rate threshold set to 0%) is useful to quickly detect when a backend instance goes into a state of complete failure, and can be used with a small window, as we did above. By contrast, the success-rate-based policy is better for detecting instances that are only partly degraded, but would typically require a longer window size in order to avoid accidentally triggering the breaker. - -## Conclusion - -Circuit breaking is a powerful tool that can improve the resilience of your microservice applications in the face of partial failure. In Linkerd, it’s one of several such tools, alongside load balancing, [response classification](https://linkerd.io/config/0.8.5/linkerd/index.html#http-response-classifiers), and [retries](https://linkerd.io/config/0.8.5/linkerd/index.html#retries). - -We’ve demonstrated how a little thought in circuit breaking strategy can go a long way toward improving the overall performance of your application. If you’re interested in trying this on your own systems with Linkerd, be sure to check out the [full failure accrual API documentation][configurable] for more configuration options. As always, you can reach us in [the Linkerd Slack](https://slack.linkerd.io/) if you run into any issues. - -[configurable]: https://linkerd.io/config/0.8.5/linkerd/index.html#failure-accrual -[readme]: https://github.com/linkerd/linkerd-examples/tree/master/failure-accrual diff --git a/linkerd.io/content/blog/making-things-faster-by-adding-more-steps.md b/linkerd.io/content/blog/making-things-faster-by-adding-more-steps.md deleted file mode 100644 index 1896add031..0000000000 --- a/linkerd.io/content/blog/making-things-faster-by-adding-more-steps.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -slug: 'making-things-faster-by-adding-more-steps' -title: 'Making Things Faster by Adding More Steps' -aliases: - - /2017/01/31/making-things-faster-by-adding-more-steps/ -author: 'andrew' -date: Wed, 01 Feb 2017 00:22:35 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_faster_featured.png -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -The development of distributed systems is full of strange paradoxes. The reasoning we develop as engineers working on a single computer can break down in unexpected ways when applied to systems made of many computers. In this article, we’ll examine one such case—how the introduction of an additional network hop can actually decrease the end-to-end response time of a distributed system. - -Speed matters. Studies have shown that the introduction of [as little as 100ms of additional latency can have a marked impact on human behavior](https://research.googleblog.com/2009/06/speed-matters.html). Generally speaking, we’d expect the introduction of a new component in a request path to increase latency. - -In this light, we’re often asked about the latency impact of adding [Linkerd](https://linkerd.io/) to a system. The simple answer is “network time, plus about 1ms” (our goal is [<1ms p95 at 1000qps]({{< ref "beyond-round-robin-load-balancing-for-latency" >}})). The more complex answer is: in most situations, Linkerd actually *reduces* end-to-end latency of your system (and improves success rate). - -How is this magic possible? In short, Linkerd’s latency-aware load balancing can rapidly detect when instances are slow and shift request traffic away from them. By combining this behavior with [circuit breaking]({{< relref -"making-microservices-more-resilient-with-circuit-breaking" >}}) and [retries](https://linkerd.io/config/0.8.6/linkerd/index.html#http-response-classifiers), Linkerd can dramatically decrease end-to-end latency in a distributed system—despite being “yet another component”. - -Let’s take a look at a dramatic example of this behavior with some poorly-behaved services. - -## Quick Start - -First let’s boot up a test environment: - -```bash -git clone https://github.com/linkerd/linkerd-examples.git -cd linkerd-examples/add-steps -docker-compose build && docker-compose up -d -open http://$(docker-machine ip default):3000 # or equivalent docker ip address -``` - -If everything worked correctly, you should see this: - -{{< fig - alt="linkerd dashboard" - title="linkerd dashboard" - src="/uploads/2017/01/add-steps-dashboard-1024x701.png" >}} - -## What’s going on here? - -You have just booted up parallel test clusters, a Linkerd cluster, and a baseline cluster: - -{{< fig - alt="steps diagram" - title="steps diagram" - src="/uploads/2017/01/add-steps-diagram.png" >}} - -We are using our load tester, [slow_cooker](https://github.com/BuoyantIO/slow_cooker), to generate traffic. Each cluster consists of ten identical [Go servers][server] that serve a simple http endpoint. Instance response times vary between 0 and 2 seconds. The slowest instances also return an error response for a certain percentage of requests. - -Comparing these two clusters, we observe the following results: - -| | linkerd | baseline | -| ------------------- | ------- | -------- | -| p50 latency | 42ms | 86ms | -| p95 latency | 604ms | 1739ms | -| p99 latency | 734ms | 2100ms | -| requests per second | 316rps | 109rps | -| success rate | 100% | 88.08% | - -Note that p50 latency is more than twice as fast with Linkerd. p99 latency is 2.8x faster with Linkerd. Linkerd’s latency-aware load balancer is favoring the faster instances while the baseline cluster’s latency is severely degraded due to the slower instances. Success rate is 100% with Linkerd vs. 88.08% without, thanks to Linkerd’s automatic retry behavior. - -All of these metrics have improved despite adding an extra network hop via Linkerd. It’s clear that [round robin](/2017/02/01/making-things-faster-by-adding-more-steps/#1-a-note-about-round-robin) does not handle degraded hosts well. Host degradation like this is extremely common in the real world for many reasons, including failing hardware, network congestion, and noisy neighbors. These issues get even more common as you scale up. - -## Linkerd Tuning - -To achieve these results, we tuned Linkerd to favor fast and healthy instances. Here is the relevant snippet from our [Linkerd config](https://github.com/linkerd/linkerd-examples/blob/master/add-steps/linkerd.yml): - -```yml -responseClassifier: - kind: io.l5d.retryableRead5XX -client: - loadBalancer: - kind: ewma - failureAccrual: - kind: io.l5d.successRate - successRate: 0.9 - requests: 20 - backoff: - kind: constant - ms: 10000 -``` - -In the `responseClassifier` section, note the use of [io.l5d.retryableRead5XX](https://linkerd.io/config/0.8.6/linkerd/index.html#retryable-read-5xx). We have configured Linkerd to retry 500s. This enables Linkerd to achieve 100% success rate even when some instances are failing. Of course, retrying a failing request adds latency for that request, but this is often a good tradeoff to make, since the alternative is a failed request. - -In the `loadBalancer` section, note the use of [EWMA](https://linkerd.io/config/0.8.6/linkerd/index.html#power-of-two-choices-peak-ewma). This algorithm computes an exponentially-weighted moving average over the latency of each instance, where recent latency performance is upweighted heavily. In our [testing]({{< relref -"beyond-round-robin-load-balancing-for-latency" >}}) , thisload-balancing algorithm responds rapidly to changes in instance latency, allowing it to perform well when latency is inconsistent. - -In the `failureAccrual` section, note the use of [io.l5d.successRate](https://linkerd.io/config/0.8.6/linkerd/index.html#success-rate). This is also computes an exponentially-weighted moving average, this time based on the success rate of each instance. In our [testing]({{< relref -"making-microservices-more-resilient-with-circuit-breaking" >}}), this failure accrual algorithm performs well when success rate is inconsistent. - -For more information on configuration options available in Linkerd, have a look at our [Configuration Reference](https://api.linkerd.io/latest/linkerd/index.html). - -## Conclusion - -In this example, we’ve seen how Linkerd can improve system throughput in the presence of failing and slow components, even though Linkerd itself adds a small amount of latency to each request. In our [experience operating large-scale systems]({{< relref "linkerd-twitter-style-operability-for-microservices" >}}), this test environment demonstrates the types of performance issues and incidents that we have seen in production. A single request from the outside can hit 10s or even 100s of services, each having 10s or 100s of instances, any of which may be slow or down. Setting up Linkerd as your service mesh can help ensure latency stays low and success rate stays high in the face of inconsistent performance and partial failure in your distributed systems. - -If you have any questions about this post, Linkerd, or distributed systems in general, feel free to stop by our [Linkerd community Slack](http://slack.linkerd.io/), post a topic on the [Linkerd Support Forum](https://linkerd.buoyant.io/), or [contact us directly](https://linkerd.io/overview/help/). - -## Acknowledgements - -Thanks to [Alex Leong](https://twitter.com/adlleong) and [Kevin Lingerfelt](https://twitter.com/klingerf) for feedback on earlier drafts of this post. - -## \[1\] A Note About Round Robin - -It was surprisingly difficult to find a DNS client that would correctly round robin IP addresses. You may have noticed that this demo uses a [slow_cooker built on golang 1.8rc2][add-steps]. This was required because [golang 1.7 does not do round robin correctly][round-robin]. We also found that [curl][curl], [ping][ping], and anything relying on [glibc’s getaddrinfo][glibc] employ a sort function based on [rfc6724](https://tools.ietf.org/html/rfc6724#section-6). This results in a preference for certain IP addresses, or in some cases a single ip address. - -[server]: https://github.com/linkerd/linkerd-examples/blob/master/add-steps/server.go -[add-steps]: https://github.com/linkerd/linkerd-examples/blob/master/add-steps/docker-compose.yml#L154 -[round-robin]: https://github.com/golang/go/commit/116da1c64a2db0387f38f8d062378b62bf0f377e -[curl]: https://github.com/curl/curl/blob/6e12bb7cbeda99be8a9c2aace18180784b6b7e26/lib/curl_addrinfo.c#L124 -[ping]: https://github.com/iputils/iputils/blob/f7710a17c4d5994313a64583f511bcdb9559f2a9/ping.c#L519 -[glibc]: https://github.com/lattera/glibc/blob/be971a2b1c7828d94c41edd5cd22c1d9dcef1b6e/sysdeps/posix/getaddrinfo.c#L1590 diff --git a/linkerd.io/content/blog/prometheus-the-right-way-lessons-learned-evolving-conduits-prometheus-integration.md b/linkerd.io/content/blog/prometheus-the-right-way-lessons-learned-evolving-conduits-prometheus-integration.md deleted file mode 100644 index 7bcfc842d8..0000000000 --- a/linkerd.io/content/blog/prometheus-the-right-way-lessons-learned-evolving-conduits-prometheus-integration.md +++ /dev/null @@ -1,154 +0,0 @@ ---- -slug: 'prometheus-the-right-way-lessons-learned-evolving-conduits-prometheus-integration' -title: "Prometheus the Right Way: Lessons learned evolving Conduit's Prometheus integration" -aliases: - - /2018/05/17/prometheus-the-right-way-lessons-learned-evolving-conduits-prometheus-integration/ -author: 'andrew' -date: Thu, 17 May 2018 18:21:51 +0000 -thumbnail: /uploads/prometheus-the-right-way.png -draft: false -featured: false -tags: [Conduit, Uncategorized] ---- - -Conduit is now part of Linkerd! [Read more >]({{< relref -"conduit-0-5-and-the-future" >}}) - -_This post was coauthored by_ [_Frederic Branczyk_](https://twitter.com/fredbrancz)_, a member of the Prometheus team._ - -[Conduit](https://conduit.io/) is an open source service mesh for Kubernetes. One of its features is a full telemetry pipeline, built on [Prometheus](https://prometheus.io/), that automatically captures service success rates, latency distributions, request volumes, and much more. - -In the 0.4.0 release, in collaboration with [Frederic Branczyk](https://twitter.com/fredbrancz), a member of the upstream Prometheus team, we rewrote this pipeline from the ground up. This post shares some of the lessons we learned about Prometheus along the way. - -## Telemetry pipeline: a first pass - -Conduit aims to provide top-line service metrics without requiring any configuration or code changes. No matter what your Kubernetes application does or what language it’s written in, Conduit should expose critical service metrics like success rates without any effort on your part. To do this, Conduit instruments all traffic as it passes through the Conduit proxies, aggregates this telemetry information, and reports the results. This is known as the “telemetry pipeline,” and Prometheus is a central component. - -When we released Conduit 0.2.0 in January, it included a first pass at user-visible telemetry. In this initial iteration, Conduit was extremely liberal in how it recorded metrics, storing histograms for every possible combination of request metadata (including paths, etc), and exposing this information at extremely small time granularities (down to the second). - -In order to do this without incurring latency or significant memory consumption in the proxy, we maintained a fixed-sized buffer of events, which the proxy periodically sent to the control plane to be processed. The control plane aggregated these events, using the Kubernetes API to discover interesting source and destination labels, and in turn exposed these detailed metrics to be scraped by Prometheus. Conduit’s control plane contained a dedicated service, called Telemetry, that exposed an API with distinct read and write paths. - -{{< fig - alt="Initial telemetry pipeline architecture" src="/uploads/2018/05/conduit-prom-1-1024x656-1024x656.png" - title="Initial telemetry pipeline architecture" >}} - -### Writes - -In this initial version, proxies pushed metrics via a [gRPC write interface][proto] provided by the Telemetry service. The overall telemetry flow was: - -1. The Conduit proxies (one in each Kubernetes Pod) push metrics to our Telemetry service via a gRPC interface. -2. The Telemetry service aggregates data from each proxy. -3. The Telemetry service exposes this aggregated data on a `/metrics` endpoint. -4. Prometheus collects from the Telemetry service’s `/metrics` endpoint. - -Here is just a small snippet of that gRPC write interface: - -```protobuf -message ReportRequest { - Process process = 1; - enum Proxy { - INBOUND = 0; - OUTBOUND = 1; - } - Proxy proxy = 2; - repeated ServerTransport server_transports = 3; - repeated ClientTransport client_transports = 4; - repeated RequestScope requests = 5; -} -message Process { - string node = 1; - string scheduled_instance = 2; - string scheduled_namespace = 3; -} -message ServerTransport { - common.IPAddress source_ip = 1; - uint32 connects = 2; - repeated TransportSummary disconnects = 3; - common.Protocol protocol = 4; -} -... -``` - -### Reads - -Similarly, the initial read path used a [gRPC read interface](https://github.com/runconduit/conduit/blob/v0.2.0/proto/controller/telemetry/telemetry.proto#L7-L35) for the Public API to query the Telemetry service, and followed a comparable flow: - -1. Public API service queries Telemetry for metrics via gRPC. -2. Telemetry service queries Prometheus. -3. Telemetry service repackages the data from Prometheus. -4. Telemetry service returns repackaged data to the Public API. - -## A collaboration - -When we announced the Conduit 0.2.0 release on Twitter, it resulted in this seemingly innocuous reply: {{< tweet 959111860871225344 >}} Frederic helped us identify a number of issues in the telemetry pipeline we had designed: - -- The push model required the Telemetry service to hold and aggregate a lot of state that was already present in all the proxies. -- Recency of data was inconsistent due to timing difference between proxy push intervals and Prometheus collection intervals. -- Though the Telemetry service appeared as a single collection target to Prometheus, we were essentially simulating a group of proxies by overloading metric labels. -- It was challenging to iterate, modify, and add new metric types. The read and write gRPC interfaces were acting as inflexible wrappers around an established Prometheus metrics format and query language. - -In addition, Conduit had re-implemented a lot of functionality that was already provided by Prometheus, and didn’t take advantage of some functionality it should have: - -- Prometheus operates with pull model. -- Prometheus already has excellent [Kubernetes service discovery support](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#%3Ckubernetes_sd_config%3E). -- Prometheus has a flexible and powerful [query language](https://prometheus.io/docs/prometheus/latest/querying/basics/). - -## Telemetry pipeline: doin’ it right - -With Frederic’s pointers in mind, we set about stripping away any functionality in Conduit that could be offloaded onto Prometheus. The most obvious component for removal was the Telemetry service itself, which was aggregating data across proxies on the write side, and serving metrics queries on the read side: two things Prometheus could already do itself. - -Removal of the Telemetry service meant that the proxies needed to serve a `/metrics` endpoint that Prometheus could collect from. To populate this, we developed a [metrics recording and serving module](https://github.com/runconduit/conduit/tree/86bb701be8ce5904334a29452fca25d0f507f6dc/proxy/src/telemetry/metrics) specific to our Rust proxy. On the read side, we then needed to wire up the Public API directly to Prometheus. Fortunately much of that integration code already existed in the Telemetry service, so we simply moved that Prometheus client code into the Public API. - -These changes allowed us to delete our gRPC read and write APIs, and yielded a much simpler and more flexible architecture: - -{{< fig - alt="Conduit Telemetry The Right Way" src="/uploads/2018/05/conduit-prom-2-1024x509-1024x509.png" - title="Updated telemetry pipeline architecture" >}} - -The new telemetry pipelines were significantly easier to reason about: - -### Write pipeline - -- Rust proxies serve a `/metrics` endpoint. -- Prometheus pulls from each proxy, discovered via Kubernetes service discovery. - -### Read pipeline - -- Public API queries Prometheus for metrics. - -We released the redesigned metrics pipeline with Conduit 0.4.0, and there was much rejoicing. - -{{< tweet 986115332657045505 >}} - -### Plus some cool new features - -The new telemetry pipeline also unlocked a number of notable cool new features, including advancements to the `conduit stat` command and an easy Grafana integration: - -```bash -$ conduit -n emojivoto stat deploy -NAME MESHED SUCCESS RPS LATENCY_P50 LATENCY_P95 LATENCY_P99 -emoji 1/1 100.00% 2.0rps 5ms 10ms 10ms -vote-bot 1/1 - - - - - -voting 1/1 87.72% 0.9rps 6ms 16ms 19ms -web 1/1 93.91% 1.9rps 8ms 41ms 48ms -``` - -{{< fig - alt="Conduit Grafana Dashboard" - src="/uploads/2018/05/conduit-grafana-1-1024x556-1024x556.png" - title="Conduit Grafana dashboard" >}} - -## Looking ahead - -Of course, there’s still a lot more work to do on the Conduit service mesh and its telemetry system, including: - -- Leveraging [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) for more complete Kubernetes resource information. -- Leveraging [Kubernetes Mixins](https://github.com/kubernetes-monitoring/kubernetes-mixin) for more flexible generation of Grafana dashboards. -- Immediate expiration of metrics in the proxy when Kubernetes services go away, in order to make appropriate use of Prometheus' staleness handling. -- Long-term metrics storage. - -We hope you enjoyed this summary of how and why we rewrote Conduit’s telemetry pipeline to better make use of Prometheus. What to try the results for yourself? [Follow the Conduit quickstart](https://conduit.io/getting-started/) and get service metrics on any Kubernetes 1.8+ app in about 60 seconds. - -If you like this sort of thing, please come get involved! Conduit is open source, everything is up on the [GitHub repo](https://github.com/runconduit/conduit), and we love new contributors. Hop into the [conduit-users](https://groups.google.com/forum/#!forum/conduit-users), [conduit-dev](https://groups.google.com/forum/#!forum/conduit-dev), and [conduit-announce](https://groups.google.com/forum/#!forum/conduit-announce) mailing lists, the [#conduit Slack channel](https://slack.linkerd.io/), and take a look at the issues marked “[help wanted](https://github.com/runconduit/conduit/labels/help%20wanted)”. For more details on upcoming Conduit releases, check out our [Public Roadmap](https://conduit.io/roadmap/). - -[proto]: https://github.com/runconduit/conduit/blob/v0.2.0/proto/proxy/telemetry/telemetry.proto diff --git a/linkerd.io/content/blog/real-world-microservices-when-services-stop-playing-well-and-start-getting-real.md b/linkerd.io/content/blog/real-world-microservices-when-services-stop-playing-well-and-start-getting-real.md deleted file mode 100644 index 894e8dd7f9..0000000000 --- a/linkerd.io/content/blog/real-world-microservices-when-services-stop-playing-well-and-start-getting-real.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -slug: 'real-world-microservices-when-services-stop-playing-well-and-start-getting-real' -title: 'Real World Microservices: When Services Stop Playing Well and Start Getting Real' -aliases: - - /2016/05/04/real-world-microservices-when-services-stop-playing-well-and-start-getting-real/ -author: 'oliver' -date: Wed, 04 May 2016 22:25:41 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_featured_PLAYING_WELL.png -tags: [Article, buoyant, Education, Linkerd, linkerd] ---- - -Microservices allow engineering teams to move quickly to grow a product… assuming they don’t get bogged down by the complexity of operating a distributed system. In this post, I’ll show you how some of the hardest operational problems in microservices—staging and canarying of deep services—can be solved by introducing the notion of *routing* to the traffic layer. Looking back at my time as an infrastructure engineer at Twitter (from 2010 to 2015), I now realize that we were “doing microservices”, though we didn’t have that vocabulary at the time. (We used what I now understand to be a bad word—_SOA_). Buzzwords aside, our motivations were the same as those doing microservices today. We needed to allow engineering teams to operate independently—to control their own deploy schedules, on call rotations, availability, and scale. These teams needed the flexibility to iterate and scale quickly and independently—without taking down the site. Having worked on one of the world’s largest microservice applications through its formational years, I can assure you that microservices are not magical scaling sprinkles—nor flexibility, nor security, nor reliability sprinkles. It’s my experience that they are considerably more difficult to operate than their monolithic counterparts. The tried and true tools we’re used to—configuration management, log processing, strace, tcpdump, etc—prove to be crude and dull instruments when applied to microservices. In a world where a single request may touch hundreds of services, each with hundreds of instances, where do I run tcpdump? Which logs do I read? If it’s slow, how do I figure out why? When I want to change something, how do I ensure these changes are safe? - -{{< tweet 651897353889259520 >}} - -When Twitter moved to microservices, it had to expend hundreds (thousands?) of staff-years just to reclaim operability. If every organization had to put this level of investment into microservices, the vast majority of these projects would simply fail. Thankfully, over the past few years, open source projects have emerged to ease some of the burden of microservice operations: projects that abstract the details of datacenters and clouds, or offer visibility into a system’s runtime state, or make it easier to write services. But this still isn’t a complete picture of what’s needed to operate microservices at scale. While there are a variety of good tools that help teams go from source code to artifact to cloud, operators don’t have nearly enough control over how these services *interact* once they’re running. At Twitter, we learned that we need tools that operate on the communication between services—[_RPC_](https://monkey.org/~marius/redux.html). It’s this experience that motivated [Linkerd](https://linkerd.io/) (pronounced “linker dee”), a *service mesh* designed to give service operators command & control over traffic between services. This encompasses a variety of features including [transport security]({{< relref "transparent-tls-with-linkerd" >}}), [load balancing]({{< relref "beyond-round-robin-load-balancing-for-latency" >}}), multiplexing, timeouts, retries, and routing. In this post, I’ll discuss Linkerd’s approach to routing. Classically, routing is one of the problems that is addressed at Layers 3 and 4—TCP/IP—with hardware load balancers, BGP, DNS, iptables, etc. While these tools still have a place in the world, they’re difficult to extend to modern multi-service software systems. Instead of operating on connections and packets, we want to operate on requests and responses. Instead of IP addressees and ports, we want to operate on services and instances. In fact, we’ve found request routing to be a versatile, high-leverage tool that can be employed to solve some of the hardest problems that arise in microservices, allowing production changes to be safe, incremental, and controllable. - -## ROUTING IN LINKERD - -Linkerd doesn’t need to be configured with a list of clients. Instead it *dynamically routes* requests and provisions clients as needed. The basic mechanics of routing involve three things: - -- a *logical name*, describing a request -- a *concrete name*, describing a service (i.e. in service discovery) -- and a *delegation table* (dtab), describing the mapping of logical to concrete names. - -Linkerd assigns a *logical name* to every request it processes, for example `/svc/users/add`, `/http/1.1/GET/users/add` or `/thrift/userService/addUser`. Logical names describe information relevant to the application but not its infrastructure, so they typically do not describe any details about service discovery (e.g. etcd, consul, ZooKeeper), environment (e.g. prod, staging), or region (e.g. us-central-1b, us-east-1). These sorts of details are encoded in *concrete names*. Concrete names typically describe a service discovery backend like ZooKeeper, etcd, consul, DNS, etc. For example: - -- `/$/inet/users.example.com/8080` names an inet address. -- `/io.l5d.k8s/default/thrift/users` names a kubernetes service. -- `/io.l5d.serversets/users/prod/thrift` names a ZooKeeper serverset. - -This “namer” subsystem is pluggable so that it can be extended to support arbitrary service discovery schemes. - -## DELEGATION - -The distinction between logical and concrete names offers two real benefits: - -1. Application code is focused on business logic–users, photos, tweets, etc–and not operational details -2. Backends can be determined contextually and, with the help of *namerd*, dynamically. - -The mapping from logical to concrete names is described by a delegation table, or [_Dtab_](https://linkerd.io/doc/dtabs/). For example, Linkerd can assign names to HTTP requests in the form `/http/1.1//` using the `io.l5d.methodAndHost` identifier. Suppose we configure Linkerd as follows: - -```yaml -namers: - - kind: io.l5d.experimental.k8s - authTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - -routers: - - protocol: http - servers: - - port: 4140 - identifier: - kind: io.l5d.methodAndHost - dstPrefix: /http - dtab: | - /srv => /io.l5d.k8s/default/http ; - /host => /srv ; - /http/1.1/* => /host ; -``` - -In this configuration, a logical name like `/http/1.1/GET/users` is delegated to the concrete name `/io.l5d.k8s/default/http/users` through rewrites: - -{{< table >}} | From | Delegation | To | |------|------------|----| | `/http/1.1/GET/users` | `/http/1.1/* => /host` | `/host/users` | | `/srv/users` | `/host => /srv` | `/srv/users` | | `/srv/users` | `/srv => /io.l5d/default/http` | `/io.l5d.k8s/default/http/users` | {{< /table >}} - -Finally, the concrete name, `/io.l5d.k8s/default/http/users`, addresses a service discovery system—in this case, the Kubernetes master API. The *io.l5d.k8s* namer expects names in the form *namespace* / *port* / *service*, so Linkerd load balancers over the addresses on the *http* port of the *users* service in the *default* namespace. Multiple namers may be combined to express logic such as *find this service in ZooKeeper, but if it’s not there fall back to the local filesystem*: - -```yaml -namers: - - kind: io.l5d.fs - rootDir: /path/to/services - - kind: io.l5d.serversets - zkAddrs: - - host: 127.0.0.1 - port: 2181 - -routers: - - protocol: http - servers: - - port: 4140 - identifier: - kind: io.l5d.methodAndHost - dstPrefix: /http - dtab: | - /srv => /io.l5d.fs ; - /srv => /io.l5d.serversets/path/to/services ; - /host => /srv ; - /http/1.1/* => /host ; -``` - -The `/srv` delegations are combined to construct a fallback so that if a serverset cannot be found, lookups will be performed against the filesystem namer. - -### PER-REQUEST OVERRIDES - -This concept of contextual resolution can be extended to alter how *individual requests* are routed. Suppose you want to stage a new version of a service and you want to get an idea how the application will behave with the new version. Assume that this service isn’t directly user-facing, but has other services that call it—a “users” service is generally a good example. You have a few options: - -1. Just deploy it to production. #YOLO -2. Deploy staging versions of all of the services that call your service. - -{{< fig - alt="requests" - title="requests" - src="/uploads/2017/07/buoyant-staging-users-v2.png" >}} - -Neither of these options are particularly manageable. The former causes user-facing problems. The latter becomes complex and cumbersome—you may not have the access or tooling needed to deploy new configurations of all of the services that call you… Happily, the routing capabilities we have with Linkerd allow us to do ad-hoc staging! We can extend the delegation system described above on an individual request to stage a new version of the users service without changing any of its callers. For example: - -```bash -curl -H 'l5d-dtab: /host/users=>/srv/users-v2' https://example.com/ -``` - -This would cause all services that would ordinarily send requests to `/srv/users` to instead send requests to `/srv/users-v2`. Only on this request. Across all services! And this isn’t just limited to curl commands: this sort of thing can also easily be supported by [browser plugins](https://chrome.google.com/webstore/detail/modheader/idgpnmonknjnojddfkpgkljpfnnfcklj). This approach greatly reduces the overhead of staging new versions of services in a complex microservice. - -## DYNAMIC ROUTING WITH NAMERD - -I’ve described how we can configure Linkerd with a static delegation table. But what if we want to change routing policy at runtime? What if we want to use a similar approach that we used for staging to support “canary” or “blue-green”” deploys? Enter *namerd*. [namerd](https://github.com/linkerd/linkerd/tree/master/namerd) is a service that allows operators to manage delegations. It fronts service discovery systems so that Linkerd does not need to communicate with service discovery directly—linkerd instances resolve names through namerd, which maintains a view of service discovery backends. - -{{< fig - alt="namerd" - title="namerd" - src="/uploads/2017/07/buoyant-namerd.png" >}} - -namerd is [configured][config] with: - -- A (pluggable) storage backend, e.g. ZooKeeper or etcd. -- “Namers” that inform namerd how to perform service discovery. -- Some external interfaces–usually a control interface so that operators may update delegations, and a sync interface for Linkerd instances. - -Linkerd’s configuration is then simplified to be something like the following: - -```yaml -routers: - - protocol: http - servers: - - port: 4180 - interpreter: - kind: io.l5d.namerd - namespace: web - dst: /$/inet/namerd.example.com/4290 - identifier: - kind: io.l5d.methodAndHost - dstPrefix: /http -``` - -And namerd has a configuration like: - -```yaml -# pluggable dtab storage -- for this example we'll just use an in-memory version. -storage: - kind: io.buoyant.namerd.storage.inMemory - -# pluggable namers (for service discovery) -namers: - - kind: io.l5d.fs - ... - - kind: io.l5d.serversets - ... - -interfaces: - # used by linkerds to receive updates - - kind: thriftNameInterpreter - ip: 0.0.0.0 - port: 4100 - - # used by `namerctl` to manage configuration - - kind: httpController - ip: 0.0.0.0 - port: 4180 -``` - -Once namerd is running and Linkerd is configured to resolve through it, we can use the [`namerctl`](https://github.com/linkerd/namerctl) command-line utility to update routing dynamically. When namerd first starts, we create a basic [dtab](https://linkerd.io/doc/dtabs/) (called *web*) as follows: - -```bash -$ namerctl dtab create web - < /io.l5d.fs ; -/srv => /io.l5d.serversets/path/to/services ; -/host => /srv ; -/http/1.1/* => /host ; -EOF -``` - -For example, to “canary test” our *users-v2* service, we might send 1% of real production traffic to it: - -```bash -$ namerctl dtab update web - < /io.l5d.fs ; -/srv => /io.l5d.serversets/path/to/services ; -/host => /srv ; -/http/1.1/* => /host ; -/host/users => 1 * /srv/users-v2 & 99 * /srv/users ; -EOF -``` - -We can control how much traffic the new version gets by altering weights. For instance, to send 25% of *users* traffic to *users-v2*, we update namerd with: - -```bash -$ namerctl dtab update web - < /io.l5d.fs ; -/srv => /io.l5d.serversets/path/to/services ; -/host => /srv ; -/http/1.1/* => /host ; -/host/users => 1 * /srv/users-v2 & 3 * /srv/users ; -EOF -``` - -Finally, when we’re happy with the performance of the new service, we can update namerd to prefer the new version as long as it’s there, but to fall-back to the original version should it disappear: - -```bash -$ namerctl dtab update web - < /io.l5d.fs ; -/srv => /io.l5d.serversets/path/to/services ; -/host => /srv ; -/http/1.1/* => /host ; -/host/users => /srv/users-v2 | /srv/users ; -EOF -``` - -Unlike Linkerd, namerd is still a fairly new project. We’re iterating quickly to make sure it’s easy to operate and debug. As it matures, it will give operators a powerful tool to control the services at *runtime*. It can be integrated with deployment tools to do safe, gradual, managed rollouts (and rollbacks) of new features. It will help teams move features out of a monolith into microservices. And it will improve debuggability of systems. I’ve seen first-hand how powerful traffic-level tooling can be, and I’m excited to introduce these features to the open source community. Just like Linkerd, namerd is open source under the Apache License v2. We’re excited about releasing it to the community, and we hope you get involved with what we’re building at Buoyant. It’s going to be awesome. - -## TRY IT FOR YOURSELF - -We’ve published the [linkerd-examples][examples] repository with examples of how to run linkerd & namerd on [Kubernetes][k8s] and [Mesos + Marathon][marathon]. These repositories should have everything you need to get up and routing. If you have any questions along the way, please don’t hesitate to ask us on [slack.linkerd.io](http://slack.linkerd.io/). - -## UPDATE OCTOBER 2016 - -Fixed the repo links above. - -[config]: https://github.com/linkerd/linkerd/blob/master/namerd/docs/config.md -[examples]: https://github.com/linkerd/linkerd-examples -[k8s]: https://github.com/linkerd/linkerd-examples/tree/master/getting-started/k8s -[marathon]: https://github.com/linkerd/linkerd-examples/tree/master/dcos diff --git a/linkerd.io/content/blog/small-memory-jvm-techniques-for-microservice-sidecars.md b/linkerd.io/content/blog/small-memory-jvm-techniques-for-microservice-sidecars.md deleted file mode 100644 index 7e74e64768..0000000000 --- a/linkerd.io/content/blog/small-memory-jvm-techniques-for-microservice-sidecars.md +++ /dev/null @@ -1,155 +0,0 @@ ---- -slug: 'small-memory-jvm-techniques-for-microservice-sidecars' -title: 'Squeezing blood from a stone: small-memory JVM techniques for microservice sidecars' -aliases: - - /2016/06/17/small-memory-jvm-techniques-for-microservice-sidecars/ -author: 'steve' -date: Fri, 17 Jun 2016 22:34:27 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_featured_sidecars.png -tags: [Article, buoyant, Education, linkerd] ---- - -In this post, we’ll describe how we reduced the memory footprint of [Linkerd](https://linkerd.io/), our JVM-based *service mesh* for cloud-native applications, by almost *80%*—from 500mb to 105mb—by tuning the JVM’s runtime parameters. We’ll describe why we went through this painful exercise, and the various things that did—and didn’t—help us get there. - -## Sidecars for Microservices - -With the rise of microservices, a new deployment idiom is gaining in popularity: the “sidecar” (or “co-”) process. A sidecar sits alongside an application instance and provides additional functionality. Because it runs as a separate process, it’s decoupled from the implementation details of the application. Unlike a library, a sidecar doesn’t require the application to be written in a specific language or framework. - -{{< fig - alt="service instance" - title="Typical sidecar deployment model" - src="/uploads/2017/07/buoyant-sidecar.png" >}} - -This decoupling is highly useful in microservices, because it’s a way for functionality to be shared across services, while still allowing individual services to be written in the language and framework most suited to the task at hand—as opposed to libraries, which constrain these choices. One of the strongest proponents of sidecars for this reason is Ben Christensen, Facebook (and formerly Netflix) engineer and author of [Hystrix](https://github.com/Netflix/Hystrix), who compares the heavy use of binary libraries in a microservices environment to [“building a distributed monolith”](https://www.microservices.com/talks/dont-build-a-distributed-monolith/). - -So, the upside of the sidecar model is that it provides a consistent layer of functionality across all services without restricting design choices. The downside, of course, is that every instance of a sidecar consumes additional resources when deployed—memory, CPU, and disk. - -[Linkerd](https://linkerd.io/), our service mesh for cloud-native applications, is often deployed as a sidecar. Unfortunately for this deployment model (but fortunately for other reasons!), Linkerd is built on the JVM, which is not exactly known for its conservative resource profile—especially around memory. Additionally, the building blocks upon which Linkerd is built, [Finagle](http://finagle.github.io/) and [netty](http://netty.io/), have been extensively tuned and tested at production scale—but primarily in large-memory environments. - -During our early work on Linkerd, we recommended 500mb as a healthy amount of memory for most workloads. However, while this was reasonable for per-host deployments, it became clear that it was too costly for most sidecar deployments. - -## The Challenge - -To make Linkerd a more viable sidecar, we were faced with a challenge: could we take this JVM process, tuned and tested for large-memory, large-CPU environments, and squeeze it down into a resource profile that’s more palatable for microservices? - -We started by trying to define the problem space. Since each Linkerd instance acts a stateless HTTP and RPC proxy, its tasks—[request-level load balancing][balancing], [routing][routing], [instrumentation][instrumentation], [error and failure handling][failures], and [distributed tracing][tracing] — don’t require disk. The resources that Linkerd consumes to drive network traffic are primarily CPU and memory. - -Additionally, there are two metrics of Linkerd performance that we really care about: *latency*, especially tail latency, and *throughput*. In the JVM, as in most GC-based network systems, these things are all tightly related: CPU usage, memory footprint, latency, and throughput all either affect or are affected by one other. - -On top of this, we imposed one inviolable constraint: *the p99 latency of Linkerd must never break 1ms, and the p999 latency must never break 5ms*. That is, for 99% of requests, time spent in Linkerd must be less than or equal to 1ms, and for another 0.9% of requests, time spent in Linkerd must be less than 5ms. Any configuration that exceeded these constraints would make Linkerd too slow, no matter how lightweight it was. - -Since the JVM is a notorious memory hog, we knew that memory footprint would probably be the most difficult resource to optimize. To really push the envelope, we decided to target a *100mb memory footprint*, as measured by resident set size (RSS). For those of you who live in the world of C programs land, this number may seem ridiculously large. For those of you who have have operated the JVM at scale, it’s laughably small. - -Finally, we took advantage of one big feature of the sidecar deployment model: since we knew each Linkerd instance would be paired with an individual service instance, we didn’t have to support total service throughput—_just that of a single instance_. Thus, the throughput requirements could be very lax, by Linkerd standards. - -From previous experiments, we knew that an individual Linkerd instance could saturate a 1GB Ethernet link with proxied traffic (roughly 40k HTTP requests per second (rps) proxied in and out). For a sidecar process, however, we could set the constraint much lower. We decided to target a measly *1,000 rps*. - -## The Goal - -The challenge was now in focus. Could we tune this JVM-based, Finagle- and netty-powered beast, born in a world of 32-core, 8-gig, bare metal machines, to operate in the confines of: - -- a 100mb memory footprint (!), as measured by RSS; -- p99 of <= 1ms; -- p999 of <= 5ms; and -- “not excessive” CPU utilization; - -… all while hitting 1,000 proxied HTTP rps? - -## Results - -After several weeks of effort, we were able to meet all our goals *except* memory footprint, where we ended 15% above goal. Still, not bad! In short, after this work, **the upcoming 0.7.1 release of Linkerd can serve 1,000 HTTP requests per second with a p99 latency of < 1ms and p999 of <5ms, in 115mb of total memory, on commodity cloud VM instances.** At a 10th of that volume, 100 rps, it sits at 105mb, just a hair over our goal. And at 40k rps—enough to saturate a 1GB Ethernet card with HTTP proxy traffic—Linkerd requires less than 300mb of memory. - -We found two specific techniques to be the most beneficial: turning off one of the two JIT compilers enabled by default (the “C2” compiler), and using a 32-bit, rather than a 64-bit, JVM. - -Here’s a chart of Linkerd’s memory footprint at various rps levels, under the old (“64-bit + C2”) and new (“32-bit + C1”) conditions: - -{{< fig - alt="footprint" - title="Footprint" - src="/uploads/2017/07/buoyant-linkerd-memory-footprint-chart.png" >}} - -## Discussion - -Of everything that we tried, switching to a 32-bit JVM made the most dramatic difference. - -What makes the 64-bit JVM so expensive? One might think it’s due to pointer width, but the JVM has defaulted to [compressed pointers][compressed pointers] for application objects since Java 6u23. The answer, for Linkerd, actually lies in the native libraries loaded by the JVM at startup time. Since these libraries exist outside of the JVM, pointer compression can’t affect them. And since the Linkerd application code itself is actually fairly conservative in terms of memory needed, the cost of loading these 64-bit native libraries as part of the baseline JVM footprint dominates everything else. - -Happily, since Linkerd doesn’t actually need to address 2GB of memory under any of the conditions we tested, we effectively lose nothing by moving to the 32-bit JVM. (If anything, we gain memory bandwidth.) - -Once we were on the 32-bit JVM, the next biggest impact on memory use was felt by removing the C2 JIT compiler. The JVM’s JIT compilers run as a hierarchy, with the C1 and C2 compilers running different kinds of optimizations. After some experimentation, we found that turning off C2 was helpful in reducing usage by 15-25mb without any substantial effect on latency or CPU. - -These two options, when combined, were sufficient to reduce memory footprint by 80% for our target 1k RPS condition. - -## A Maze of Twisty Passages - -In the course of our experiments, we also tried many things that we ended up rejecting—either because they didn’t affect the memory usage, or they made it worse, or they helped but made other aspects of performance worse. - -Below is a sampling of some of the more interesting things we tried. - -### COMPACT PROFILES - -The JVM advertises several levels of “compact profiles” designed to reduce the size of the JVM image itself. These are primarily useful for embedded devices that e.g. store the JVM in flash memory. We experimented with these profiles and found no significant impact on runtime memory footprint. - -However, when using Linkerd in a Docker container, these options were useful for reducing the size of the container image itself. Of the several levels of compact profiles available, `compact3` is the smallest that still provides all the JVM classes required to run Linkerd. We found no performance impact in using `compact3` vs a normal 32-bit JVM. - -The main drawback to using `compact3` is that it doesn’t come with most of the debugging tools one expects from a full JVM, like `jstat`, `jstack`, `jmap`, etc. If you rely on these tools for production debugging, you may want to use the standard 32-bit JDK. (Linkerd itself exports many JVM stats and has a profiler built-in, so you may not be as reliant on JVM tooling as you might expect.) - -### SHRINKING THE STACK - -Linkerd actually doesn’t make heavy use of the stack. In our experiments, shrinking the stack didn’t significantly lower memory usage. - -### REDUCING THE NUMBER OF JVM INTERNAL THREADS - -On an 8-core machine, the JVM will typically start 8 GC threads and 8 JIT compiler threads. You can tune those down using JVM and Linkerd flags. We found that reducing these made latency less predictable, and didn’t significantly improve memory use. We reverted to default settings, allowing the JVM to “tune itself” to run more threads to fit the host’s resources and let the OS scheduler schedule these threads. - -### TURNING OFF THE JIT ENTIRELY - -We tried turning off both JIT compilers and only relying on the bytecode interpreter. This resulted in a substantial reduction on memory footprint, but at the expense of making latency extremely poor and very high variance. We quickly undid these changes. - -### TURNING OFF THE BYTECODE VERIFIER - -Disabling the bytecode verifier reduced CPU utilization by about 15% in our tests. - -However, the verifier protects against malicious bytecode and also against bytecode generation bugs. We felt that disabling the verifier could potentially result in a mysteriously unstable production service, so we ultimately decided to keep it on. - -## A note on high throughput instances - -While the new settings allowed Linkerd to scale its own memory consumption without requiring workload-specific tuning, we found that this graceful behavior broke down around 20k rps. Thus, if you’re planning to serve more than 20k HTTP rps from a single Linkerd instance, we currently recommend setting the environment flags `JVM_HEAP_MIN` and `JVM_HEAP_MAX` to `256mb` for a 32-bit JVM (or `512mb` for a 64-bit JVM) to maintain performance. - -(In future releases, these parameters may be encoded in a set of QPS-specific options for Linkerd.) - -## Trying this at home - -In order to run Linkerd in your own environment following the recommendations above, generally speaking, we recommend running Linkerd under the [32-bit build of OpenJDK 8.](http://packages.ubuntu.com/xenial/i386/openjdk-8-jdk/download) - -Of course, running a 32-bit JVM on a 64-bit architecture isn’t quite as simple as just installing it. You have to first install the space-saving 32-bit libraries. Here are a few guides to help you along: - -- [How to run 32-bit binaries on a 64-bit Debian](https://wiki.debian.org/Multiarch/HOWTO) -- [How to run 32-bit binaries on a 64-bit Ubuntu 14.04 LTS](http://askubuntu.com/questions/454253/how-to-run-32-bit-app-in-ubuntu-64-bit/454254#454254) - -As described in the *Compact Profiles* section above, if you’re running Linkerd in a Docker image, you may also want to consider a [`compact3` JRE build](https://github.com/ofayau/docker-openjdk/blob/master/openjdk-compact3/Dockerfile). Alternatively, we publish Docker images with `compact3` set that you can obtain by pulling: - -```bash -docker pull buoyantio/linkerd:latest -``` - -Finally, if you want to reproduce the results above, all tests were done by sending sustained load over multiple hours to a pre-release version of Linkerd 0.7.1, using 8-core Debian 8 VMs running on Google Compute Engine. The load tester, proxy, and backends all ran on separate hosts to reduce resource contamination. - -Good luck! As always, feel free to [reach out to use for help](https://linkerd.io/help/). - -## Conclusion - -In the upcoming 0.7.1 release of Linkerd, memory consumption is dramatically reduced out of the gate—and generally shouldn’t require tuning for workloads under 20k rps. If you’re running Linkerd as a sidecar and doing <= 1k rps per instance, Linkerd should handle sustained load with around 115mb of memory. - -## Acknowledgements - -Thanks for [Marius Eriksen](https://twitter.com/marius) and [William Morgan](https://twitter.com/wm) for reviewing early drafts of this post. Thanks to [Brandon Mitchell](https://twitter.com/bitbckt) of [FaunaDB](https://fauna.com/) for sharing his knowledge of HotSpot internals with me. Thanks also to [David Pollak](http://blog.goodstuff.im/) for suggesting that I try a 32-bit JVM for small heaps many years ago. - -[compressed pointers]: http://docs.oracle.com/javase/7/docs/technotes/guides/vm/performance-enhancements-7.html#compressedOop -[balancing]: https://linkerd.io/features/load-balancing/ -[routing]: https://linkerd.io/features/routing/ -[instrumentation]: https://twitter.com/linkerd/status/720410954610049025 -[failures]: https://linkerd.io/doc/0.6.0/linkerd/retries/ -[tracing]: /2016/05/17/distributed-tracing-for-polyglot-microservices/ diff --git a/linkerd.io/content/blog/transparent-tls-with-linkerd.md b/linkerd.io/content/blog/transparent-tls-with-linkerd.md deleted file mode 100644 index 307ffe6d8f..0000000000 --- a/linkerd.io/content/blog/transparent-tls-with-linkerd.md +++ /dev/null @@ -1,141 +0,0 @@ ---- -slug: 'transparent-tls-with-linkerd' -title: 'Transparent TLS with Linkerd' -aliases: - - /2016/03/24/transparent-tls-with-linkerd/ -author: 'alex' -date: Thu, 24 Mar 2016 22:16:06 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_featured_transparent.png -tags: [Article, Education, Linkerd, linkerd, tutorials] ---- - -In this post, we describe how Linkerd, our *service mesh* for cloud-native applications, can be used to transparently “wrap” HTTP and RPC calls in TLS, adding a layer of security to applications without requiring modification of application code. - -**NOTE:** we have an [updated version of this post][part-iii]. - -[Linkerd](https://linkerd.io/) includes client-side load balancing as one of its core features. In its basic form, outgoing HTTP and RPC calls from a service are proxied through Linkerd, which adds service discovery, load balancing, instrumentation, etc., to these calls. - -However, as a service mesh, Linkerd can additionally be used to handle inbound HTTP and RPC calls. In other words, Linkerd can act as both a proxy and a reverse proxy. This is the full service mesh deployment model, and it has some nice properties—in particular, when Linkerd is deployed on a host, or as a sidecar in systems like Kubernetes, it allows Linkerd to *modify* or *upgrade* the protocol over the wire. One particularly exciting use case for a service mesh is to automatically add TLS across host boundaries. - -Adding TLS directly to an application can be difficult, depending on the level of support for it in an application’s language and libraries. This problem is compounded for polyglot multi-service applications. By handling TLS in Linkerd, rather than the application, you can encrypt communication across hosts without needing to modify application code. Additionally, for multi-service applications, you get a uniform application-wide layer for adding TLS—helpful for configuration changes, monitoring, and security auditing. - -In the example below, we’ll “wrap” a simple Kubernetes application in TLS via Linkerd. We’ll take advantage of the fact that Kubernetes’s pod model colocates containers in a pod on the same host, ensuring that the unencrypted traffic between your service and its sidecar Linkerd process stays on the same host, while all traffic across pods (and thus across machines) is encrypted. - -{{< fig - src="/uploads/2017/07/buoyant-l2l-diagram.png" - alt="tls diagram" - title="TLS">}} - -Of course, encryption is only one part of TLS–authentication is also important. Linkerd supports several TLS configurations: - -- no validation (insecure) -- a site-wide certificate for all services -- per-service or per-environment certificates - -In this example, we will focus on the certificate per-service setup, since this is most appropriate for production use cases. We will generate a root CA certificate, use it to generate and sign a certificate for each service in our application, distribute the certificates to the appropriate pods in Kubernetes, and configure Linkerd to use the certificates to encrypt and authenticate inter-pod communication. - -We’ll assume that you already have Linkerd deployed to Kubernetes. If not, check out our [Kubernetes guide](https://linkerd.io/doc/0.2.1/k8s) first. - -## GENERATING CERTIFICATES - -To begin, we’ll need a root CA certificate and key that we can use to generate and sign certificates for each of our services. This can be generated using [openssl](https://www.openssl.org/) (the commands below assume that you have an openssl.cnf config file in the directory where you’re running them — see [this gist](https://gist.github.com/klingerf/d43738ac98b6bf0479c47987977a7782) for a sample version of that file). Create the root CA certificate. - -```bash -openssl req -x509 -nodes -newkey rsa:2048 -config openssl.cnf \ - -subj '/C=US/CN=My CA' -keyout certificates/private/cakey.pem \ - -out certificates/cacertificate.pem -``` - -This will generate your CA key (cakey.pem) and your CA certificate (cacertificate.pem). It is important that you store the CA key in a secure location (do not deploy it to Kubernetes)! Anyone who gets access to this key will be able to generate and sign certificates and will be able to impersonate your services. - -Once you have your root CA certificate and key, you can generate a certificate and key for each service in your application. - -```txt -# generate a certificate signing request with the common name "$SERVICE_NAME" -openssl req -new -nodes -config openssl.cnf -subj "/C=US/CN=$SERVICE_NAME" \ - -keyout certificates/private/${SERVICE_NAME}key.pem \ - -out certificates/${SERVICE_NAME}req.pem - -# have the CA sign the certificate -openssl ca -batch -config openssl.cnf -keyfile certificates/private/cakey.pem \ - -cert certificates/cacertificate.pem \ - -out certificates/${SERVICE_NAME}certificate.pem \ - -infiles certificates/${SERVICE_NAME}req.pem -``` - -Here we use the Kubernetes service name as the TLS common name. - -## DISTRIBUTING CERTIFICATES - -Now that we have certificates and keys, we need to distribute them to the appropriate pods. Each pod needs the certificate and key for the service that is running there (for serving TLS) as well as the root CA certificate (for validating the identity of other services). Certificates and keys can be distributed using Kubernetes secrets, [just like Linkerd configs](https://linkerd.io/doc/0.2.1/k8s). Example secret: - -```yml ---- -kind: Secret -apiVersion: v1 -metadata: - name: certificates -namespace: prod -type: Opaque -data: - certificate.pem: $BASE_64_ENCODED_CERT - key.pem: $BASE_64_ENCODED_KEY - cacertificate.pem: $BASE_64_ENCODED_CACERT -``` - -## CONFIGURING LINKERD - -Finally, we need to configure Linkerd to use the certificates. To set this up, start with a [service mesh deployment](https://linkerd.io/in-depth/deployment). Add a [server tls config](https://linkerd.io/config/1.1.1/linkerd/index.html#server-tls) to the incoming router and a boundPath [client tls module](https://linkerd.io/config/1.1.1/linkerd/index.html#client-tls) to the outgoing router: - -```yml ---- -namers: - - kind: io.l5d.experimental.k8s - prefix: /ns - host: localhost - port: 8001 - -routers: - - protocol: http - label: incoming - servers: - - port: 4140 - ip: 0.0.0.0 - # accept incoming TLS traffic from remote Linkerd - tls: - certPath: /certificates/certificate.pem - keyPath: /certificates/key.pem - dtab: | - /svc => /$/inet/127.1/8080; - - - protocol: http - label: outgoing - client: - # sends outgoing TLS traffic to remote Linkerd - tls: - kind: io.l5d.clientTls.boundPath - caCertPath: /certificates/cacertificate.pem - names: - - prefix: '/ns/*/*/{service}' - commonNamePattern: '{service}' - servers: - - port: 4141 - ip: 0.0.0.0 - dtab: | - /srv => /ns/prod/router; - /svc => /srv; -``` - -The server TLS section configures the incoming router to serve TLS using the service’s certificate and key. The boundPath client TLS section configures the outgoing router to validate the identity of services that it talks to. It pulls the service name from the destination bound path, uses that as the TLS common name, and uses the CA certificate to verify the legitimacy of the remote service. To see how that works, let’s walk through an example: - -Suppose that `ServiceA` wants to send a request to `ServiceB`. To do this, `ServiceA`sends the request to the outgoing router of its sidecar Linkerd which is listening on `localhost:4141`. `ServiceA` also sends a `Host: ServiceB` header to indicate where the request should be routed. When Linkerd receives this request, it generates `/svc/ServiceB` as the destination. Applying the [dtab](https://linkerd.io/doc/dtabs/), this gets rewritten to `/ns/prod/router/serviceB`. This is called the *bound path*. Since this matches the prefix we specified in the boundPath TLS module, Linkerd will send this request using TLS. The k8s namer will then resolve `/ns/prod/router/serviceB` to a list of concrete endpoints where the incoming routers of `ServiceB`’s sidecar linkers are listening (and are configured to receive TLS traffic). - -That’s it! Inter-service communication run through will now be secured using TLS and no changes to your application are necessary. And, of course, just as in non-TLS configurations, Linkerd adds connection pooling, load balancing, uniform instrumentation, and powerful routing capabilities to your services, helping them scale to high traffic, low latency environments. - -## ACKNOWLEDGMENTS - -Thanks to [Sarah Brown](https://twitter.com/esbie) and [Greg Campbell](https://twitter.com/gtcampbell) for feedback on earlier drafts of this post. - -[part-i]: {{< ref "a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}} [part-ii]: {{< ref "a-service-mesh-for-kubernetes-part-ii-pods-are-great-until-theyre-not" >}} [part-iii]: {{< ref "a-service-mesh-for-kubernetes-part-iii-encrypting-all-the-things" >}} [part-iv]: {{< ref "a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting" >}} [part-v]: {{< ref "a-service-mesh-for-kubernetes-part-v-dogfood-environments-ingress-and-edge-routing" >}} [part-vi]: {{< ref "a-service-mesh-for-kubernetes-part-vi-staging-microservices-without-the-tears" >}} [part-vii]: {{< ref "a-service-mesh-for-kubernetes-part-vii-distributed-tracing-made-easy" >}} [part-viii]: {{< ref "a-service-mesh-for-kubernetes-part-viii-linkerd-as-an-ingress-controller" >}} [part-ix]: {{< ref "a-service-mesh-for-kubernetes-part-ix-grpc-for-fun-and-profit" >}} [part-x]: {{< ref "a-service-mesh-for-kubernetes-part-x-the-service-mesh-api" >}} [part-xi]: {{< ref "a-service-mesh-for-kubernetes-part-xi-egress" >}} diff --git a/linkerd.io/content/blog/upgrading-to-linkerd-0-6-0.md b/linkerd.io/content/blog/upgrading-to-linkerd-0-6-0.md deleted file mode 100644 index bb56ddb0d3..0000000000 --- a/linkerd.io/content/blog/upgrading-to-linkerd-0-6-0.md +++ /dev/null @@ -1,136 +0,0 @@ ---- -slug: 'upgrading-to-linkerd-0-6-0' -title: 'Upgrading to Linkerd 0.6.0' -aliases: - - /2016/05/24/upgrading-to-linkerd-0-6-0/ -author: 'alex' -date: Tue, 24 May 2016 22:33:14 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_v_060.png -tags: [Article, Buoyant, Linkerd, linkerd, News] ---- - -Version 0.6.0 of Linkerd and namerd were released today! We wanted to take the opportunity in this release to bring more consistency and uniformity to our config files. Unfortunately, this means making non-backwards compatible changes. In this post, we describe how to update your config files to work with 0.6.0. - -## KIND NAMES - -Linkerd and namerd use a plugin system where plugins are identified in config files by the `kind` property. We’ve renamed the `kind`s of our plugins to follow a consistent pattern: `.`. We use `io.l5d` as the organization for Linkerd’s built-in plugins. For example, the `kind` of the etcd storage plugin changed from`io.buoyant.namerd.storage.experimental.etcd` to `io.l5d.etcd`. - -**You will need to update the `kind` properties in your configs to their new values**. A full list of the `kind` name changes is below. You can always find more information about plugins in the [Linkerd config docs](https://linkerd.io/doc/0.6.0/linkerd/config/). - -```txt -# Identifiers -default -> io.l5d.methodAndHost - -# Response Classifiers -retryableIdempotent5XX -> io.l5d.retryableIdempotent5XX -retryableRead5XX -> io.l5d.retryableRead5XX -nonRetryable5XX -> io.l5d.nonRetryable5XX - -# Client TLS Config -io.l5d.clientTls.boundPath -> io.l5d.boundPath -io.l5d.clientTls.noValidation -> io.l5d.noValidation -io.l5d.clientTls.static -> io.l5d.static - -# Tracers -io.l5d.zipkin -> io.l5d.zipkin - -# Namers -io.l5d.experimental.consul -> io.l5d.consul -io.l5d.fs -> io.l5d.fs -io.l5d.experimental.k8s -> io.l5d.k8s -io.l5d.experimental.marathon -> io.l5d.marathon -io.l5d.serversets -> io.l5d.serversets - -# namerd Interfaces -httpController -> io.l5d.httpController -thriftNameInterpreter -> io.l5d.thriftNameInterpreter - -# namerd Dtab Storage -io.buoyant.namerd.storage.experimental.etcd -> io.l5d.etcd -io.buoyant.namerd.storage.inMemory -> io.l5d.inMemory -io.buoyant.namerd.storage.experimental.k8s -> io.l5d.k8s -io.buoyant.namerd.storage.experimental.zk -> io.l5d.zk -``` - -## EXPERIMENTAL PLUGINS - -Certain plugins have been marked as experimental. While these plugins definitely work, they have not yet been tested at scale so we can’t be sure how they will perform in production environments. **In order to use these plugins, you’ll need to acknowledge their experimental status by setting the `experimental: true` property on the plugin’s config.** For example: - -```yml -kind: io.l5d.k8s -experimental: true # must be set because this plugin is experimental -host: localhost -port: 8001 -``` - -If a plugin is experimental, this will be indicated in the [Linkerd config docs](https://linkerd.io/doc/0.6.0/linkerd/config/). A full list of the currently experimental plugins is below: - -```yml -- namers - io.l5d.consul - io.l5d.k8s - io.l5d.marathon -- storage - io.l5d.k8s - io.l5d.zk - io.l5d.etcd -``` - -## NAMER PATHS - -Namers match on concrete names and bind them to physical addresses (for a fuller explanation of this, see: [https://linkerd.io/doc/dtabs/#namers-addresses][namers]). If a path begins with `/$`, that indicates that it is a concrete name and that the classpath should be searched for a namer to use. For example, `/$/inet/127.0.0.1/4140` searches the classpath for a namer called `inet` and uses it to bind this path. - -In 0.6.0 we added a similar indicator for concrete names that should be bound by namers specified in the config file. If a path begins with `/#`, that indicates that it is a concrete name and a namer from the config file should be used. The result is that dtabs are more readable because it is more obvious which paths can be handled by namers (those starting with `/$` or `/#`) and which require further processing by the dtab entries. - -**This means that all dtab entries that refer to a namer prefix need to be updated to begin with `/#`.** For example, the entry - -```txt -/srv => /io.l5d.fs -``` - -would need to be changed to - -```txt -/srv => /#/io.l5d.fs -``` - -Any path beginning with `/#/io.l5d.fs` means that this is a concrete name and the `io.l5d.fs` namer should be used to bind it. - -## ZOOKEEPER ADDRESSES - -To make the way that ZooKeeper hosts are addressed more consistent, **the ZooKeeper dtab storage plugin config now requires ZooKeeper addresses be specified as follows**: - -```yml -zkAddrs: -- host: zkHost1 - port: 1234 -- host: zkHost2 - port: 1234 -``` - -## DC/OS - -If you have installed namerd via the [official DC/OS universe packages](https://github.com/mesosphere/universe), and are using the `io.l5d.zk` storage plugin, you will need to update any dtabs referencing `/io.l5d.marathon`. This should be done in conjunction with upgrading Linkerd and namerd DC/OS packages from pre-0.6.0 to 0.6.0 or higher. Specifically, change lines like this: - -```txt -/srv => /io.l5d.marathon ; -``` - -to this: - -```txt -/srv => /#/io.l5d.marathon ; -``` - -To update namerd via [namerctl](https://github.com/linkerd/namerctl), run the following commands: - -```bash -export NAMERCTL_BASE_URL=http://namerd.example.com:4180 - -namerctl dtab get default > default.dtab -DTAB_VERSION=`awk '/# version/{print $NF}' default.dtab` -sed -i -- 's//io.l5d.marathon//#/io.l5d.marathon/g' default.dtab -namerctl dtab update --version=$DTAB_VERSION default default.dtab -``` - -## ADDITIONAL SUPPORT - -If you run into any difficulties with this upgrade, or just want to chat, join us in the [Linkerd Slack channel](http://slack.linkerd.io/)! - -[namers]: https://linkerd.io/doc/dtabs/#namers-addresses diff --git a/linkerd.io/content/blog/using-linkerd-as-a-service-mesh-proxy-at-wepay.md b/linkerd.io/content/blog/using-linkerd-as-a-service-mesh-proxy-at-wepay.md deleted file mode 100644 index bfc88275db..0000000000 --- a/linkerd.io/content/blog/using-linkerd-as-a-service-mesh-proxy-at-wepay.md +++ /dev/null @@ -1,149 +0,0 @@ ---- -slug: 'using-linkerd-as-a-service-mesh-proxy-at-wepay' -title: 'Using Linkerd as a Service Mesh Proxy at WePay' -aliases: - - /2018/06/26/using-linkerd-as-a-service-mesh-proxy-at-wepay/ -author: 'mohsen' -date: Wed, 27 Jun 2018 01:29:57 +0000 -draft: false -featured: false -thumbnail: /uploads/WePay-logo.png -tags: - [ - case study, - Community, - GKE, - Google Cloud Engine, - kubernetes, - Linkerd, - linkerd, - Linkerd in production, - Tutorials & How-To's, - ] ---- - - - -_This post originally appeared on [WePay's Engineering Blog](https://wecode.wepay.com/posts/using-l5d-as-a-service-mesh-proxy-at-wepay)._ - -In the upcoming months, we are going to write a series of posts documenting [WePay Engineering’s](https://wecode.wepay.com/) journey from traditional load balancers to a service mesh on top of [Google’s Kubernetes Engine](https://cloud.google.com/kubernetes-engine/) (GKE). - -In this first part of the series, we are going to take a look at some of the routing and load balancing options that we have used before, compare them with the services we have looked at as possible service mesh proxies, and how they’d change the way our infrastructure operates. - -{{< fig - alt="service mesh sidecar proxy" - src="/uploads/wepay_image_0.png" - title="Figure 1: Data plane using sidecar proxy pattern" >}} - -Figure 1 shows a simplified version of a [data plane](https://medium.com/microservices-learning/understanding-microservices-communication-and-service-mesh-e888d1adc41), in service mesh terms, where Service X is sending a request to Service Y via it’s sidecar proxy. Since Service X is sending the request through it’s proxy, the request is first passed to Service X’s proxy (PX), then sent to Service Y’s proxy (PY) before getting to the destination, Service Y. In most cases, PX finds PY through a service discovery service, e.g. [Namerd](https://linkerd.io/advanced/namerd/). - -_Our [meetup session about gRPC](https://youtu.be/8KWmNw9jQ04?t=28m59s) talks a bit about using this pattern for proxy load balancing._ - -In this post, to keep things simple, we’re going to focus on the data plane, and to simplify things further, we’re going to only talk about proxies using the [sidecar pattern](https://docs.microsoft.com/en-us/azure/architecture/patterns/sidecar). - -_Side note: all technologies mentioned in this post are very sophisticated pieces of software that have been written by talented engineers and open sourced to be available for other companies with similar use cases. The comparisons below are solely based on WePay’s use cases and which technology fit those use cases best, and it’s not intended to the discredit other technologies mentioned._ - -## Setting the stage - -At WePay, we are currently running many microservices (Sx) in GKE. Some of these microservices talk to other microservices in the same data center, which looks something like this: - -{{< fig - alt="sidecar ssl proxy nginx" - src="/uploads/wepay_image_1.png" - title="Figure 2: Simple load balancing using GKE and NGINX" >}} - -In the model shown in figure 2, Service Y sends a request to Service X, and [Kubernetes’ load balancing object](https://kubernetes.io/docs/concepts/services-networking/service/) does the load balancing for Service X by forwarding the request to X1’s NGINX sidecar. When NGINX receives the request, it terminates SSL and forwards the packet to X1. - -As we have grown the number of microservices in our infrastructure in the past year or so, the following issues have proven to be very important to us, and in some ways, the motivation for our move to a service mesh: - -- Smarter, performant, and concurrent load balancing -- Platform and protocol agnostic routing, with HTTP and HTTP/2 (with focus on gRPC) as requirements -- Application independent routing and tracing metrics -- Traffic security - -Once we knew we wanted to migrate into a service mesh infrastructure, we looked at various different proxies for building our data plane. From the list, [Envoy](https://www.envoyproxy.io/) and [Linkerd](https://linkerd.io/) looked to be the closest to our interests, while offering a mature feature set at the same time. - -_Side note: At the time of research, NGINX had no service mesh support, but in an effort to support the service mesh infrastructure, [NGINX has added Istio support](https://www.nginx.com/press/implementation-nginx-as-service-proxy-istio/). For the purpose of comparison, Envoy and NGINX fall in the same pool._ - -## Better load balancing - -[Envoy][envoy] and [Linkerd][round-robin] both offer access to some of the more sophisticated load balancing algorithms, but Linkerd’s focus on [performance][perf], [tuning][tuning] and the platform’s usage of [Finagle](https://twitter.github.io/finagle/), made it an appealing choice for load balancing. - -{{< fig - alt="sidecar extended" - src="https://wecode.wepay.com/assets/2018-06-11-using-l5d-as-a-service-mesh-proxy-at-wepay/image_2.png" - title="Figure 3: Sidecar proxy pattern handles load balancing" >}} - -Figure 3 shows how a service mesh proxy handles the load balancing using a list of available destinations acquired through service discovery. - -In addition to the basic load balancing features, Linkerd also allows pushing the load balancing closer to the edge of each Kubernetes node with support for [Kubernetes DaemonSets](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/). From a resource allocation perspective, this also lowers the cost of running the proxies in larger clusters, significantly. - -{{< fig - alt="daemonset extended" - src="https://wecode.wepay.com/assets/2018-06-11-using-l5d-as-a-service-mesh-proxy-at-wepay/image_3.png" - title="Figure 4: DaemonSet proxy pattern" >}} - -In figure 4 the DaemonSet pattern shows each Kubernetes cluster nodes hosting one proxy. When Service Y sends a request to Service Z, the request is handed off to the Sender’s node proxy, where using service discovery, it forwards the request to Receiver’s node proxy, and eventually the package is delivered to Service Z. This pattern makes maintaining and configuring these proxies easier by separating the lifecycle of proxies from microservices running in the same cluster. - -## New protocols, same infrastructure - -Back in 2017, when we were looking at improving our service to service communications with gRPC, [Linkerd supported HTTP/2 and gRPC out of the box](https://buoyant.io/2017/01/10/http2-grpc-and-linkerd/) making it easier to migrate to a service mesh using Linkerd. - -In addition, the ability to use both HTTP and HTTP/2 (gRPC) for any microservice, and the need for supporting multiple protocols at the same time in our infrastructure, meant that multi-protocol support had become a hard requirement for choosing a proxy for our infrastructure.. - -{{< fig - alt="http and grpc together" - src="https://wecode.wepay.com/assets/2018-06-11-using-l5d-as-a-service-mesh-proxy-at-wepay/image_4.png" - title="Figure 5: The proxy accepts and forwards both gRPC and HTTP" >}} - -This diagram shows how some requests are using HTTP while others are using HTTP/2. Being able to use multiple protocols with the same infrastructure configuration proven to be a critical feature when we planned our migration from HTTP to HTTP/2 (gRPC). During a migration, we have some services talking to each other over HTTP, while others are communicating over HTTP/2. Figure 5 is imagining the infrastructure as the rollout happens over time. In a future post we will dive deeper into how our microservices send and receive different types of payloads in our infrastructure, e.g. REST, Protobufs, etc. - -Today, most service mesh proxies, including Envoy, handle the latest protocols like HTTP, HTTP/2, and others. - -## I can haz metrics - -In our infrastructure we make use of [Prometheus](https://prometheus.io/) to monitor Kubernetes, microservices, and other internal services. [Envoy requires an extra step](https://www.datawire.io/faster/ambassador-prometheus/) to make use of Prometheus, but with the ready-to-use [Prometheus telemetry plugin](https://linkerd.io/administration/telemetry/) from Linkerd, it was easier for us to get up and running with graphs without the need for extra services gluing service mesh proxies to our visualization dashboard: - -{{< fig - alt="proxy metrics" - src="https://wecode.wepay.com/assets/2018-06-11-using-l5d-as-a-service-mesh-proxy-at-wepay/image_5.png" - title="Proxy metrics" >}} - -{{< fig - alt="service metrics" - src="https://wecode.wepay.com/assets/2018-06-11-using-l5d-as-a-service-mesh-proxy-at-wepay/image_6.png" - title="Figure 6: Cluster and application level view of proxy metrics" >}} - -The sample dashboards in figure 6 show the global, per-microservice, and per-proxy traffic in one place for better visibility into what’s going through the infrastructure, in a DaemonSet proxy pattern. - -One of the other convenient parts of using Linkerd is the range of metrics the proxy comes with out of the box. In addition, Linkerd also makes it easier to write custom plugins to control, for example, the retry mechanism using those custom metrics. So any specific metrics, alerting, and monitoring can be retro fitted to meet the need of the infrastructure that’s running service mesh. - -## Crank it up, security that is - -Most proxies nowadays support various proxy to proxy encryption and authorization methods, and with Linkerd we have the ability to go even further when used with the sidecar pattern. Using the sidecar pattern, we’re able to use the per-service authorization in Linkerd, which gives us the ability to maximize infrastructure security, where and when applicable. - -One thing that does work differently in an environment setup with the sidecar proxy pattern is per-service TLS certificates for SSL handshakes. - -{{< fig - alt="security service specific certs" - src="https://wecode.wepay.com/assets/2018-06-11-using-l5d-as-a-service-mesh-proxy-at-wepay/image_7.png" - title="Figure 7: Per-service TLS certificates used for SSL handshakes" >}} - -Figure 7 shows a Linkerd proxy for Service Z using Service X’s certificates when sending a request to Service X, and using Service Y’s certificates when sending a request to Service Y. This gives us the ability to maintain, update, and change SSL certificates for each service independent of each other, and also increase the security of our microservices. - -This feature can be useful for some setups, but quite overkill for some others, so having the ability to choose one over the other is a nice feature. - -## Conclusion - -Based on the infrastructure requirements and improvements in mind, we decided to pick Linkerd for our technology stack. - -Using Linkerd, we can have the reliability that we need, introduce new protocols to our infrastructure for our microservices to use, have more visibility into our service traffic, and tweak security as we see fit. - -In the upcoming blogs in this series, we’re going to talk about different parts of a service mesh architecture, and how they apply to WePay’s architecture. - - - -[envoy]: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/load_balancing -[round-robin]: https://buoyant.io/2016/03/16/beyond-round-robin-load-balancing-for-latency/ -[perf]: https://blog.buoyant.io/2017/01/31/making-things-faster-by-adding-more-steps/ -[tuning]: https://blog.buoyant.io/2017/01/31/making-things-faster-by-adding-more-steps/ diff --git a/linkerd.io/content/blog/using-linkerd-kubernetes-rbac.md b/linkerd.io/content/blog/using-linkerd-kubernetes-rbac.md deleted file mode 100644 index f62b5a97c6..0000000000 --- a/linkerd.io/content/blog/using-linkerd-kubernetes-rbac.md +++ /dev/null @@ -1,191 +0,0 @@ ---- -slug: 'using-linkerd-kubernetes-rbac' -title: 'Using Linkerd with Kubernetes RBAC' -aliases: - - /2017/07/24/using-linkerd-kubernetes-rbac/ -author: 'risha' -date: Mon, 24 Jul 2017 22:09:31 +0000 -draft: false -featured: false -thumbnail: /uploads/linkerd_featured.png -tags: [Linkerd, linkerd, News, tutorials] ---- - -If you're running Kubernetes 1.6 or later, you can optionally make use of Kubernetes' new support for [RBAC (role-based access control)](https://kubernetes.io/blog/2017/04/rbac-support-in-kubernetes/), which allows you to restrict who can [access the Kubernetes API](https://kubernetes.io/docs/admin/accessing-the-api/) on the cluster and what they can do with it. However, when upgrading to an RBAC-enabled cluster you can run into issues, as many Kubernetes examples do not take into account the fact that certain API calls may be restricted. - -In this post, we’ll show you how to use [Linkerd](https://linkerd.io), our open source _service mesh_ for cloud-native applications, with RBAC-enabled Kubernetes clusters. - -## What is RBAC? - -First, it's helpful to understand what RBAC actually does. RBAC works by defining a _role_ that describes a set of permissions, and by then assigning that role to relevant users/service accounts. In Kubernetes RBAC, these roles restrict which Kubernetes verbs can be used (e.g. `get`, `list`, `create`), and which Kubernetes resources they can be applied to (e.g. `pods`, `services`). So, for example, we can create a `Role` (called, for example, “read-only”) that only allows `get` and `watch` on pod resources. And we can then create `RoleBinding`s to assign this “read-only” role to whichever “subjects” need them, e.g. the “qa-bot” service account. - -In order for Linkerd to operate in an RBAC-enabled cluster, we need to make sure that the types of access that Linkerd needs to the Kubernetes APIs are allowed. Below, we'll walk through how to do this. If you just want the completed config, you can skip to the bottom—or just use [linkerd-rbac-beta.yml][linkerd-rbac] (stored in our [linkerd-examples][linkerd-example] repo). - -We'll be setting up the permission by creating a `ClusterRole` and a `ClusterRoleBinding`, illustrated below. - -{{< fig - alt="RBAC" - title="Configuration" - src="/uploads/2018/05/blog_rbac_configuration.png" >}} - -## Granting Linkerd access to an RBAC Kubernetes Cluster - -When used with a Kubernetes cluster, Linkerd uses its `io.l5d.k8s` “namer” to do service discovery against the Kubernetes API. (Of course, this namer can be used in conjunction with other service discovery mechanisms, allowing Linkerd to bridge Kubernetes and non-Kubernetes systems—but that's a later blog post). - -Linkerd only requires read access, and only needs access to access the `services` and `endpoints` Kubernetes resources. We can capture this access via the following Kubernetes config: - -```yml ---- -# grant linkerd/namerd permissions to enable service discovery -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1beta1 -metadata: - name: linkerd-endpoints-reader -rules: - - apiGroups: [""] # "" indicates the core API group - resources: ["endpoints", "services", "pods"] # pod access is required for the *-legacy.yml examples in linkerd-examples - verbs: ["get", "watch", "list"] -``` - -For simplicity’s sake, at this point we could just assign this role to the `default` service account (which is the account Kubernetes assigns to you when you [create a pod](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/) if you don’t specify one): - -```yml ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1beta1 -metadata: - name: linkerd-role-binding -subjects: - - kind: ServiceAccount - name: default # change this to your service account if you’ve specified one - namespace: default -roleRef: - kind: ClusterRole - name: linkerd-endpoints-reader - apiGroup: rbac.authorization.k8s.io -``` - -Linkerd now has the access it needs to function in a Kubernetes environment. In production, however, you might want to use a dedicated service account—[see below](#running-linkerd-with-a-specified-service-account). - -### Namerd - -If you’re using [Namerd](https://github.com/linkerd/linkerd/blob/master/namerd/README.md) as a control plane to dynamically change routing configuration across all Linkerd instances ([see here](https://buoyant.io/2016/11/04/a-service-mesh-for-kubernetes-part-iv-continuous-deployment-via-traffic-shifting/) for why you might want to do this), you’ll need some additional permissions. Namerd needs access to a Kubernetes `ThirdPartyResource` to store its routing rules ("dtabs"). In our example namerd.yml, we’ve added this resource as `d-tab.l5d.io`. We can allow Namerd read and write access to this resource using the following role: - -```yml ---- -# grant namerd permissions to third party resources for dtab storage -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1beta1 -metadata: - name: namerd-dtab-storage -rules: - - apiGroups: ["l5d.io"] - resources: ["dtabs"] - verbs: ["get", "watch", "list", "update", "create"] -``` - -Similar to above, we’ll assign the role to the `default` service account with a role binding: - -```yml ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1beta1 -metadata: - name: namerd-role-binding -subjects: - - kind: ServiceAccount - name: default - namespace: default -roleRef: - kind: ClusterRole - name: namerd-dtab-storage - apiGroup: rbac.authorization.k8s.io -``` - -### Running Linkerd with a specified Service Account - -In the previous sections, we used the default service account to run Linkerd. For some use cases, however, you may want to create a dedicated service account and assign the permissions to that account. You’ll also want to consider whether you want roles to be cluster-scoped (`ClusterRoleBinding`) or namespace-scoped (`RoleBinding`). Let’s go through how to configure permissions for a specific service account, `linkerd-svc-account`, starting from the [linkerd.yml][daemonset] config in linkerd-examples. We’ll add a `ServiceAccount` config, and assign that service account to the pod. Here’s part of the file, with `linkerd-svc-account` added: - -```yml ---- -apiVersion: extensions/v1beta1 -kind: DaemonSet -metadata: - labels: - app: l5d - name: l5d -spec: - template: - metadata: - labels: - app: l5d - spec: - volumes: - - name: l5d-config - configMap: - name: 'l5d-config' - serviceAccount: linkerd-svc-account - containers: - - name: l5d - image: buoyantio/linkerd:1.1.2 - env: - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - args: - - /io.buoyant/linkerd/config/config.yaml - ports: - - name: outgoing - containerPort: 4140 - hostPort: 4140 - - name: incoming - containerPort: 4141 - - name: admin - containerPort: 9990 - volumeMounts: - - name: 'l5d-config' - mountPath: '/io.buoyant/linkerd/config' - readOnly: true - - - name: kubectl - image: buoyantio/kubectl:v1.4.0 - args: - - 'proxy' - - '-p' - - '8001' ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: linkerd-svc-account -``` - -Then we’ll change the subject in your linkerd-rbac-beta.yml to reference this new service account: - -```yml ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1beta1 -metadata: - name: linkerd-role-binding -subjects: - - kind: ServiceAccount - name: linkerd-svc-account - namespace: default -roleRef: - kind: ClusterRole - name: linkerd-endpoints-reader - apiGroup: rbac.authorization.k8s.io -``` - -And that’s it! The Linkerd pods now use the `linkerd-svc-account` and have the right permissions. - -## Putting it all together - -For a complete Kubernetes config file that uses all of the above, just use this file: [linkerd-rbac.yml][linkerd-rbac]. This config will allow Linkerd and Namerd to have all the access needed to the Kubernetes API with the default service account. If you'd like to set this up using a dedicated service account, you'll need to modify linkerd-rbac-beta.yml, as described in the previous section. We hope this post was useful. We’d love to get your thoughts. Please join us in the [Linkerd Support Forum](https://linkerd.buoyant.io/) and the Linkerd [Slack](https://slack.linkerd.io/) channel! And for more walkthroughs of how to use [Linkerd’s various features](https://linkerd.io/features/index.html) on Kubernetes, see our [Service Mesh For Kubernetes]({{< ref -"a-service-mesh-for-kubernetes-part-i-top-line-service-metrics" >}}) blog series. - -[daemonset]: https://raw.githubusercontent.com/linkerd/linkerd-examples/master/k8s-daemonset/k8s/linkerd.yml -[linkerd-rbac]: https://github.com/linkerd/linkerd-examples/blob/master/k8s-daemonset/k8s/linkerd-rbac.yml -[linkerd-example]: https://github.com/linkerd/linkerd-examples/tree/master/k8s-daemonset diff --git a/linkerd.io/content/blog/webinar-recap-deep-dive-conduits-rust-based-data-plane.md b/linkerd.io/content/blog/webinar-recap-deep-dive-conduits-rust-based-data-plane.md deleted file mode 100644 index ab42fd5802..0000000000 --- a/linkerd.io/content/blog/webinar-recap-deep-dive-conduits-rust-based-data-plane.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -slug: 'webinar-recap-deep-dive-conduits-rust-based-data-plane' -title: 'Webinar recap: A deep dive into Conduit’s Rust-based data plane' -aliases: - - /2018/02/05/webinar-recap-deep-dive-conduits-rust-based-data-plane/ -author: 'courtney' -date: Mon, 05 Feb 2018 19:16:20 +0000 -draft: false -featured: false -thumbnail: /uploads/conduit_webinar_recap.png -tags: - [ - conduit, - Conduit, - Linkerd, - Release Notes, - rust, - rustlang, - service mesh, - webinar, - Webinars, - ] ---- - -Conduit is now part of Linkerd! [Read more >]({{< relref -"conduit-0-5-and-the-future" >}}) - -Earlier this month, Conduit core contributors and Rust enthusiasts Carl Lerche and Sean McArthur presented a look at the internals of the new Conduit service mesh, explored its fundamental design choices, and covered our motivations for writing the Conduit data plane in Rust. In case you missed it, we have some of the key takeaways below. - -## Why do we need Conduit in a world with Linkerd? - -Buoyant sponsors two open source service mesh projects: [Linkerd](https://linkerd.io) and [Conduit](https://conduit.io). Linkerd is a battle tested, production grade, multi-platform, and feature-rich service mesh that is nearly two years mature. Built on components like Scala and the JVM, it's very adept at scaling up for high end performance use cases that can handle tens of thousands of requests per second per host. - -While Linkerd is great at scaling up, its fundamental components also prevent it from scaling down. New emergent deployment patterns for microservices mean that they typically operate at significantly smaller scale than what Linkerd is best suited for. In those scenarios, a more appropriate solution is necessary. So in December, we introduced Conduit. - -Conduit is a radically new take on the service mesh with a very different fundamental design philosophy. Conduit focuses on being small, ultralight, performant, secure, and simple. It reduces complexity by having very few moving parts and requiring very little configuration. In order to achieve all of those goals, Conduit developers had to make very specific architectural choices like those covered in this webinar. - -## Memory safety guarantees with Rust - -The proxying layer of a service mesh (aka, the data plane) has very strict performance and safety requirements. It gets injected into the communication layer between all of your services and every single byte that is sent or received over the network gets routed through it. In production environments, protecting sensitive data is a paramount concern as well as as regulatory one (HIPPA, PII, etc). The data plane must be fundamentally secure. At the same time, it’s also critical to limit the performance impact incurred when introducing that additional management layer. You want manageability and security, but not at the cost of performance. When it comes to production-grade performance, what matters most is ensuring*predictable* performance, with very low latency variance. - -Modern programming languages either include a runtime (e.g. Go or Java) or they don’t (e.g. C/C++). The use of runtime abstracts a lot of low-level management, but that overhead incurs a significant performance hit that makes it unsuitable for use in the data plane. Foregoing a runtime gets in range of the performance requirements necessary in that layer, but that means taking responsibility for low-level tasks like memory management and introducing new risk by exposure to buffer overflow attack vectors. Historically, this has been the tradeoff every developer faces when choosing between safety and speed. - -In order to provide both speed and safety, the Conduit team opted to use [Rust](https://www.rust-lang.org/) to develop the data plane. Rust is a relatively new language that doesn't require a runtime. It guarantees memory safety to prevent buffer overflow attacks while also compiling down to native code to ensure predictable high end performance. In the webinar, we cover the particulars of how Rust makes these guarantees, as well as which Rust components are used in Conduit, what they do, and how you can contribute and get involved. - -You don’t need to learn Rust to use Conduit. Simply install and run it like any other piece of software you use. In fact, you probably don’t need to learn Rust to make contributions to Conduit either. While the Conduit data plane is written in Rust, the control plane is written in Go--a language commonly used in microservice management projects. For more specifics on all of these topics and more, check out the webinar below. - -{{< youtube ig-I1641Gdk >}} - -## More information - -If you haven’t already tried [Conduit](http://conduit.io), follow the [getting started](https://conduit.io/getting-started/) guide. Check out the source on [Github](https://github.com/runconduit/conduit) and star the project if you like what we’re doing. If you have questions, come join us on the #conduit channel in the [Linkerd Slack group](http://linkerd.slack.com). diff --git a/linkerd.io/content/blog/webinar-recap-navigating-the-service-mesh-ecosystem.md b/linkerd.io/content/blog/webinar-recap-navigating-the-service-mesh-ecosystem.md deleted file mode 100644 index d8c7dc82a8..0000000000 --- a/linkerd.io/content/blog/webinar-recap-navigating-the-service-mesh-ecosystem.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -slug: 'webinar-recap-navigating-the-service-mesh-ecosystem' -title: 'Webinar recap: Navigating the service mesh ecosystem' -aliases: - - /2018/03/26/webinar-recap-navigating-the-service-mesh-ecosystem/ -author: 'gmiranda23' -date: Mon, 26 Mar 2018 17:30:38 +0000 -draft: false -featured: false -thumbnail: /uploads/navigating_the_ecosystem.png -tags: [Conduit, Uncategorized] ---- - -Conduit is now part of Linkerd! [Read more >]({{< relref -"conduit-0-5-and-the-future" >}}) - -Earlier this month, Christian Posta (Red Hat) joined me to present a joint webinar looking at the various open-source service mesh projects (Linkerd, Envoy, Istio, and Conduit) to help users make sense of where to start and how to navigate the many options available to them. Check out the webinar below for the full length session along with Q&A. - -The webinar has tips for both beginners and advanced users. We begin with a brief high-level overview of common service mesh architecture and explore the types of considerations that teams should be thinking about when evaluating different solutions, both from a technical and organizational perspective. - -{{< youtube X8CBGsTLuHU >}} - -Below are some highlights. We hope you’ll watch the recording, and please join us on the individual project Slack channels if you have more specific questions ([Linkerd](https://linkerd.slack.com), [Envoy](https://envoyslack.cncf.io/), [Istio](https://istio.slack.com/), and #conduit on [Linkerd Slack](https://slack.linkerd.io/)). You can also reach either [Christian](https://twitter.com/christianposta) or [me](https://twitter.com/gmiranda23) directly via Twitter, the [CNCF Slack group](https://cloud-native.slack.com/), or the [Kubernetes Slack group](https://kubernetes.slack.com). We’d love to hear about your journey into navigating the service mesh ecosystem and how we can help get you started. - -## Service mesh architecture - -Aside from [basics about a service mesh](https://buoyant.io/2017/04/25/whats-a-service-mesh-and-why-do-i-need-one/), we discussed how the shift to microservices introduces a new class of communication into your infrastructure. In microservice-based architectures, service-to-service communication suddenly becomes the primary fundamental factor that determines how your applications will behave at runtime. - -Complicating runtime behavior, the shift to microservices also means you begin to see a sprawl of ownership and workflows. Services typically shift to being owned by different teams, with different schedules, and often conflicting priorities. Understanding the relationships between the many interdependent services supporting your mission critical apps can easily become impossible. The service mesh exists to solve these operational runtime challenges. - -{{< fig alt="Basic service mesh architecture" -src="/uploads/2018/03/Screen-Shot-2018-03-20-at-3.43.11-PM-300x168.png" -title="Basic service mesh architecture" >}} - -In this basic architectural diagram the green boxes in the data plane represent apps, the blue squares are service mesh proxies, and the rectangles are app endpoints (a pod, a physical host, etc). The service mesh should also provide a control plane where you, as an operator, compose policy that alters behavior in the data plane. The service mesh manages all service requests (e.g. messages) with an inherent understanding that makes it application aware. It provides capabilities like retries, timeouts, and circuit breaking to improve overall resiliency. - -## A new way to solve old problems - -The service mesh isn’t just limited to managing service requests. As a proxy layer it can, and should, manage all network traffic. Because the value proposition behind a service mesh is particularly strong when it comes to managing service requests (e.g. messages), we sometimes see frequent comparisons to things like messaging-oriented middleware, an Enterprise Service Bus (ESB), Enterprise Application Integration patterns (EAI), API Gateways, or resilience libraries like Netflix’s Hystrix or Twitter’s Finagle. - -The service mesh is different because it lives as a dedicated infrastructure layer that is decoupled and managed separately from your applications. The service mesh relieves developers from having to implement solutions that are tightly coupled to your application business logic. - -## Questions should you be asking - -Learning about and implementing a new solution always comes at a cost measured in cognitive burden and time. With many service mesh solutions existing today, it helps to clearly understand your own needs before making the investment in due diligence to try out tools that could be right for your environment. To better understand how your needs align with what different service mesh options provide, we covered a list of technical and operational questions to help you pinpoint a starting point on your service mesh journey. - -- Am I ready for a service mesh? Is your organization? -- What problems am I having today? Are you experiencing pain today or simply preparing for what you think you might need? -- What platforms does my team need to support? -- What level of observability do your services have today? Where are the current gaps in logging, tracing, etc? -- What functionalities of a service mesh do you already have? Can you introduce a service mesh safely and how will it interact with the features you’ve already built? -- What is the division of responsibility in your organization and will the product you’re considering allow you to work in ways that support that structure? -- Does your team favor centralized or decentralized functionality? -- What support needs does your team have? - -## A service mesh landscape - -The webinar then dives deep into each of the existing open-source solutions on the market today (Linkerd, Envoy, Istio, and Conduit) to examine the different use cases for which each is best suited. Whether you have a complex architecture that needs to support a number of third-party integrations, you want something easy to use and simple to understand, or if you need a large all-encompassing framework, there is a service mesh solution that could be right for you. - -We encourage you to listen to this portion of the webinar because it’s more than we can simply summarize in this post. As always, please reach out if you’d like to dive deeper into your particular situation, and stay tuned for more webinars like this in the future. diff --git a/linkerd.io/content/blog/whats-a-service-mesh-and-why-do-i-need-one.md b/linkerd.io/content/blog/whats-a-service-mesh-and-why-do-i-need-one.md deleted file mode 100644 index bb6da2a7d5..0000000000 --- a/linkerd.io/content/blog/whats-a-service-mesh-and-why-do-i-need-one.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -slug: 'whats-a-service-mesh-and-why-do-i-need-one' -title: "What's a service mesh? And why do I need one?" -aliases: - - /2017/04/25/whats-a-service-mesh-and-why-do-i-need-one/ -author: 'william' -date: Tue, 25 Apr 2017 23:35:29 +0000 -draft: false -featured: false -thumbnail: /uploads/service_mesh_featured.png -tags: - [ - Buoyant, - buoyant, - cloud-native, - conduit, - Conduit, - Industry Perspectives, - Linkerd, - linkerd, - microservices, - service mesh, - ] ---- - -**Edit 11/26/2018: Since we wrote this post, lots has happened. Most excitingly, [Linkerd 2.0 has been released]({{< ref "announcing-linkerd-2-0" >}})!** - -tl;dr: A service mesh is a dedicated infrastructure layer for making service-to-service communication safe, fast, and reliable. If you’re building a cloud native application, you need a service mesh. - -Over the past year, the service mesh has emerged as a critical component of the cloud native stack. High-traffic companies like [Paypal, Ticketmaster, and Credit Karma have all added a service mesh to their production applications]({{< ref "announcing-linkerd-1-0" >}}), and this January, [Linkerd](http://linkerd.io), the open source service mesh for cloud native applications, [became an official project of the Cloud Native Computing Foundation](https://techcrunch.com/2017/01/23/cloud-native-computing-foundation-adds-linkerd-as-its-fifth-hosted-project/). But what is a service mesh, exactly? And why is it suddenly relevant? - -In this article, I’ll define the service mesh and trace its lineage through shifts in application architecture over the past decade. I’ll distinguish the service mesh from the related, but distinct, concepts of API gateways, edge proxies, and the enterprise service bus. Finally, I’ll describe where the service mesh is heading, and what to expect as this concept evolves alongside cloud native adoption. - -## WHAT IS A SERVICE MESH? - -A service mesh is a dedicated infrastructure layer for handling service-to-service communication. It’s responsible for the reliable delivery of requests through the complex topology of services that comprise a modern, cloud native application. In practice, the service mesh is typically implemented as an array of lightweight network proxies that are deployed alongside application code, without the application needing to be aware. (But there are variations to this idea, as we’ll see.) - -The concept of the service mesh as a separate layer is tied to the rise of the cloud native application. In the cloud native model, a single application might consist of hundreds of services; each service might have thousands of instances; and each of those instances might be in a constantly-changing state as they are dynamically scheduled by an orchestrator like Kubernetes. Not only is service communication in this world incredibly complex, it’s a pervasive and fundamental part of runtime behavior. Managing it is vital to ensuring end-to-end performance and reliability. - -## IS THE SERVICE MESH A NETWORKING MODEL? - -The service mesh is a networking model that sits at a layer of abstraction above TCP/IP. It assumes that the underlying L3/L4 network is present and capable of delivering bytes from point to point. (It also assumes that this network, as with every other aspect of the environment, is unreliable; the service mesh must therefore also be capable of handling network failures.) - -In some ways, the service mesh is analogous to TCP/IP. Just as the TCP stack abstracts the mechanics of reliably delivering bytes between network endpoints, the service mesh abstracts the mechanics of reliably delivering requests between services. Like TCP, the service mesh doesn’t care about the actual payload or how it’s encoded. The application has a high-level goal (“send something from A to B”), and the job of the service mesh, like that of TCP, is to accomplish this goal while handling any failures along the way. - -Unlike TCP, the service mesh has a significant goal beyond “just make it work”: it provides a uniform, application-wide point for introducing visibility and control into the application runtime. The explicit goal of the service mesh is to move service communication out of the realm of the invisible, implied infrastructure, and into the role of a *first-class member of the ecosystem*—where it can be monitored, managed and controlled. - -## WHAT DOES A SERVICE MESH ACTUALLY DO? - -Reliably delivering requests in a cloud native application can be incredibly complex. A service mesh like [Linkerd](https://linkerd.io/) manages this complexity with a wide array of powerful techniques: circuit-breaking, latency-aware load balancing, eventually consistent (“advisory”) service discovery, retries, and deadlines. These features must all work in conjunction, and the interactions between these features and the complex environment in which they operate can be quite subtle. - -For example, when a request is made to a service through Linkerd, a very simplified timeline of events is as follows: - -1. Linkerd applies dynamic routing rules to determine which service the requester intended. Should the request be routed to a service in production or in staging? To a service in a local datacenter or one in the cloud? To the most recent version of a service that’s being tested or to an older one that’s been vetted in production? All of these routing rules are dynamically configurable, and can be applied both globally and for arbitrary slices of traffic. -2. Having found the correct destination, Linkerd retrieves the corresponding pool of instances from the relevant service discovery endpoint, of which there may be several. If this information diverges from what Linkerd has observed in practice, Linkerd makes a decision about which source of information to trust. -3. Linkerd chooses the instance most likely to return a fast response based on a variety of factors, including its observed latency for recent requests. -4. Linkerd attempts to send the request to the instance, recording the latency and response type of the result. -5. If the instance is down, unresponsive, or fails to process the request, Linkerd retries the request on another instance (but only if it knows the request is idempotent). -6. If an instance is consistently returning errors, Linkerd evicts it from the load balancing pool, to be periodically retried later (for example, an instance may be undergoing a transient failure). -7. If the deadline for the request has elapsed, Linkerd proactively fails the request rather than adding load with further retries. -8. Linkerd captures every aspect of the above behavior in the form of metrics and distributed tracing, which are emitted to a centralized metrics system. - -And that’s just the simplified version–Linkerd can also initiate and terminate TLS, perform protocol upgrades, dynamically shift traffic, and fail over between datacenters! - -{{< fig - alt="mesh" - title="Mesh" - src="/uploads/2017/04/linkerd-service-mesh-diagram-1024x587.png" >}} - -It’s important to note that these features are intended to provide both pointwise resilience and application-wide resilience. Large-scale distributed systems, no matter how they’re architected, have one defining characteristic: they provide many opportunities for small, localized failures to escalate into system-wide catastrophic failures. The service mesh must be designed to safeguard against these escalations by shedding load and failing fast when the underlying systems approach their limits. - -## WHY IS THE SERVICE MESH NECESSARY? - -The service mesh is ultimately not an introduction of new functionality, but rather a shift in where functionality is located. Web applications have always had to manage the complexity of service communication. The origins of the service mesh model can be traced in the evolution of these applications over the past decade and a half. - -Consider the typical architecture of a medium-sized web application in the 2000’s: the three-tiered app. In this model, application logic, web serving logic, and storage logic are each a separate layer. The communication between layers, while complex, is limited in scope—there are only two hops, after all. There is no “mesh”, but there is communication logic between hops, handled within the code of each layer. - -When this architectural approach was pushed to very high scale, it started to break. Companies like Google, Netflix, and Twitter, faced with massive traffic requirements, implemented what was effectively a predecessor of the cloud native approach: the application layer was split into many services (sometimes called “microservices”), and the tiers became a topology. In these systems, a generalized communication layer became suddenly relevant, but typically took the form of a “fat client” library—Twitter’s [Finagle](https://twitter.github.io/finagle/), Netflix’s [Hystrix](https://github.com/Netflix/Hystrix), and Google’s Stubby being cases in point. - -In many ways, libraries like Finagle, Stubby, and Hystrix were the first service meshes. While they were specific to the details of their surrounding environment, and required the use of specific languages and frameworks, they were forms of dedicated infrastructure for managing service-to-service communication, and (in the case of the open source Finagle and Hystrix libraries) found use outside of their origin companies. - -Fast forward to the modern cloud native application. The cloud native model combines the microservices approach of many small services with two additional factors: containers (e.g. [Docker](https://docker.com/)), which provide resource isolation and dependency management, and an orchestration layer (e.g. [Kubernetes](http://kubernetes.io/)), which abstracts away the underlying hardware into a homogeneous pool. - -These three components allow applications to adapt with natural mechanisms for scaling under load and for handling the ever-present partial failures of the cloud environment. But with hundreds of services or thousands of instances, and an orchestration layer that’s rescheduling instances from moment to moment, the path that a single request follows through the service topology can be incredibly complex, and since containers make it easy for each service to be written in a different language, the library approach is no longer feasible. - -This combination of complexity and criticality motivates the need for a dedicated layer for service-to-service communication decoupled from application code and able to capture the highly dynamic nature of the underlying environment. This layer is the service mesh. - -## THE FUTURE OF THE SERVICE MESH - -While service mesh adoption in the cloud native ecosystem is growing rapidly, there is an extensive and exciting roadmap ahead still to be explored. The requirements for serverless computing (e.g. Amazon’s [Lambda](https://aws.amazon.com/lambda/)) fit directly into the service mesh’s model of naming and linking, and form a natural extension of its role in the cloud native ecosystem. The roles of service identity and access policy are still very nascent in cloud native environments, and the service mesh is well poised to play a fundamental part of the story here. Finally, the service mesh, like TCP/IP before it, will continue to be pushed further into the underlying infrastructure. Just as Linkerd evolved from systems like Finagle, the current incarnation of the service mesh as a separate, user-space proxy that must be explicitly added to a cloud native stack will also continue to evolve. - -## CONCLUSION - -The service mesh is a critical component of the cloud native stack. A little more than one year from its launch, Linkerd is part of the Cloud Native Computing Foundation and has a thriving community of contributors and users. Adopters range from startups like Monzo, which is disrupting the UK banking industry, to high scale Internet companies like Paypal, Ticketmaster, and Credit Karma, to companies that have been in business for hundreds of years like Houghton Mifflin Harcourt. - -The Linkerd open source community of adopters and contributors are demonstrating the value of the service mesh model every day. We’re committed to building an amazing product and continuing to grow our incredible community. [Join us](https://linkerd.io/)! diff --git a/linkerd.io/content/dashboard/20230202-linkerd-day-eu.md b/linkerd.io/content/dashboard/20230202-linkerd-day-eu.md deleted file mode 100644 index 4d7f147e75..0000000000 --- a/linkerd.io/content/dashboard/20230202-linkerd-day-eu.md +++ /dev/null @@ -1,17 +0,0 @@ -+++ -date = 2023-02-02T00:00:00Z -title = "Linkerd Day at KubeCon EU 2023" -+++ - -The Linkerd maintainers are thrilled to announce the first ever [Linkerd Day], -happening in Amsterdam on 18 April, 2023, at KubeCon/CloudNativeCon! - -[Linkerd Day] is a practitioner-driven community conference, emphasizing -end-user case studies and technical talks. Interested in [speaking]? The CFP -is open until 12 February, and we're available to help with proposals if -needed! Just ask in the `#workshops` channel, or DM `@Flynn` or `@Catherine`, -in the [Linkerd Slack]! - -[Linkerd Day]: https://events.linuxfoundation.org/kubecon-cloudnativecon-europe/cncf-hosted-co-located-events/linkerd-day/ -[speaking]: https://events.linuxfoundation.org/kubecon-cloudnativecon-europe/cncf-hosted-co-located-events/linkerd-day/ -[Linkerd Slack]: https://slack.linkerd.io diff --git a/linkerd.io/content/dashboard/20230713-linkerd-forum.md b/linkerd.io/content/dashboard/20230713-linkerd-forum.md new file mode 100644 index 0000000000..897039a223 --- /dev/null +++ b/linkerd.io/content/dashboard/20230713-linkerd-forum.md @@ -0,0 +1,14 @@ ++++ +date = 2023-07-13T00:00:00Z +title = "Linkerd Support Forum is Live!" ++++ + +Looking for a new way to get help from the Linkerd maintainers and community +members? Sad that messages disappear on Slack? GitHub discussions too galling +for you? + +We're pleased to announce the [Linkerd Forum] to help with all these issues! +It's a place where you can post questions, get help, and search the entire +existing conversation. We hope to see you there! + +[Linkerd Forum]: https://linkerd.buoyant.io/ diff --git a/linkerd.io/content/faq/_index.md b/linkerd.io/content/faq/_index.md index 8d80f7a6cf..6ffb023012 100644 --- a/linkerd.io/content/faq/_index.md +++ b/linkerd.io/content/faq/_index.md @@ -1,5 +1,8 @@ --- title: Frequently Asked Questions +description: What is Linkerd? What’s the difference between Linkerd and + Istio? Why doesn’t Linkerd use Envoy? Get answers to these questions + and more. type: faq include_toc: true enableFAQSchema: true @@ -29,7 +32,7 @@ faqs: security audits](https://github.com/linkerd/linkerd2/blob/main/SECURITY_AUDIT.pdf). Finally, Linkerd has publicly [committed to open - governance](https://linkerd.io/2019/10/03/linkerds-commitment-to-open-governance/) + governance](/2019/10/03/linkerds-commitment-to-open-governance/) and is hosted by [the CNCF](https://cncf.io). answer_schema: Linkerd is significantly lighter and simpler than Istio. Linkerd is built @@ -62,7 +65,7 @@ faqs: The choice of Rust also allows Linkerd to avoid a whole class of CVEs and vulnerabilities that can impact proxies written in non-memory-safe languages like C++. See [Why Linkerd doesn't use - Envoy](https://linkerd.io/2020/12/03/why-linkerd-doesnt-use-envoy/) for + Envoy](/2020/12/03/why-linkerd-doesnt-use-envoy/) for more. answer_schema: Envoy is a complex general-purpose proxy. Linkerd uses a simple and @@ -73,7 +76,7 @@ faqs: The choice of Rust also allows Linkerd to avoid a whole class of CVEs and vulnerabilities that can impact proxies written in non-memory-safe languages like C++. See [Why Linkerd doesn't use - Envoy](https://linkerd.io/2020/12/03/why-linkerd-doesnt-use-envoy/) for + Envoy](/2020/12/03/why-linkerd-doesnt-use-envoy/) for more. - question: Who owns Linkerd and how is it licensed? answer: @@ -90,7 +93,7 @@ faqs: - question: Who is Linkerd for? answer: Linkerd is for everyone. (See [Linkerd's Commitment to Open - Governance](https://linkerd.io/2019/10/03/linkerds-commitment-to-open-governance/).) + Governance](/2019/10/03/linkerds-commitment-to-open-governance/).) In practice, Linkerd has certain technical prerequisites, such as Kubernetes. answer_schema: @@ -105,14 +108,16 @@ faqs: answer: 'Just like this: Linkerd. Capital "L", lower-case everything else.' answer_schema: 'Just like this: Linkerd. Capital "L", lower-case everything else.' - - question: Is there a Linkerd "enterprise edition"? + - question: Is there a Linkerd enterprise edition? answer: - No. Linkerd is fully open source with everything you need to run it in - production as part of the open source project. + Yes, enterprise distributions of Linkerd are available from Buoyant + (creators of Linkerd) as well as other companies. See the list of + companies that provide [commercial distributions of + Linkerd](/enterprise/). - question: Can I get commercial support? answer: Yes. See the list of companies that provide [commercial support for - Linkerd](https://linkerd.io/enterprise/). + Linkerd](/enterprise/). - question: What's on the Linkerd roadmap? answer: See the [Linkerd project diff --git a/linkerd.io/content/heroes/_index.md b/linkerd.io/content/heroes/_index.md index 157e11c59c..ba51c4383c 100644 --- a/linkerd.io/content/heroes/_index.md +++ b/linkerd.io/content/heroes/_index.md @@ -3,19 +3,30 @@ title: Heroes layout: heroes url: /community/heroes/ aliases: ["/heroes/"] +description: Open source is all about community, and Linkerd wouldn’t be what it + is today without our heroes. Linkerd Heroes contribute to the codebase, help + each other out on Slack and social media, and share their successes and failures + at conferences or through blogs. top_hero: title: Linkerd Heroes description: Open source is all about community, and Linkerd wouldn't be what it is today without our heroes. Linkerd Heroes contribute to the codebase, help each - other out whether on Slack or social media, share their successes and failures - at conferences or through blogs, fostering the dynamic and engaging community - we all love. + other out on Slack, social media, and in person, and share their successes and + failures at conferences or through blogs, fostering the dynamic and engaging + community we all love. image: "/images/heroes-graphic.svg" image_on_the_right: false alt: Portrait of Rio Kierkels voted: badge: "/uploads/credly-badges_voted.svg" heroes: + - name: Mikael Fridh + date: July 2023 + blurb: Technical contributions + hero_type: contributor + image: "/uploads/2023/07/mikael-fridh-portrait.jpeg" + alt: Portrait of Mikael Fridh + github_url: "https://www.linkedin.com/in/mikaelfridh/" - name: Yu Cao date: February 2023 blurb: Technical contributions diff --git a/linkerd.io/content/meetup-in-a-box/_index.md b/linkerd.io/content/meetup-in-a-box/_index.md index 04382fe505..ea8f7d5579 100644 --- a/linkerd.io/content/meetup-in-a-box/_index.md +++ b/linkerd.io/content/meetup-in-a-box/_index.md @@ -1,5 +1,7 @@ --- layout: meetup-in-a-box +title: Meetup-in-a-Box +description: Are you passionate about Linkerd and sharing your experience with the broader community? We want to help you tell your story! Whether writing a blog post, creating a YouTube or Twitch video, or speaking at a conference, our team will help you deliver your Linkerd story to a broad audience. top_hero: title: 'All you need for a successful Linkerd talk' description: diff --git a/linkerd.io/static/images/adopters/db-schenker.png b/linkerd.io/static/images/adopters/db-schenker.png new file mode 100644 index 0000000000..e8bbbd9c2e Binary files /dev/null and b/linkerd.io/static/images/adopters/db-schenker.png differ diff --git a/linkerd.io/static/uploads/2016/04/routing-diagram.png b/linkerd.io/static/uploads/2016/04/routing-diagram.png deleted file mode 100644 index ba4e547009..0000000000 Binary files a/linkerd.io/static/uploads/2016/04/routing-diagram.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/01/add-steps-dashboard-1024x701.png b/linkerd.io/static/uploads/2017/01/add-steps-dashboard-1024x701.png deleted file mode 100644 index 2316770749..0000000000 Binary files a/linkerd.io/static/uploads/2017/01/add-steps-dashboard-1024x701.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/01/add-steps-diagram.png b/linkerd.io/static/uploads/2017/01/add-steps-diagram.png deleted file mode 100644 index 26f5ef5d64..0000000000 Binary files a/linkerd.io/static/uploads/2017/01/add-steps-diagram.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/04/linkerd-service-mesh-diagram-1024x587.png b/linkerd.io/static/uploads/2017/04/linkerd-service-mesh-diagram-1024x587.png deleted file mode 100644 index 8b8b4edfe6..0000000000 Binary files a/linkerd.io/static/uploads/2017/04/linkerd-service-mesh-diagram-1024x587.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-1_everyone.png b/linkerd.io/static/uploads/2017/07/buoyant-1_everyone.png deleted file mode 100644 index 9a508ac786..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-1_everyone.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-2_personal.png b/linkerd.io/static/uploads/2017/07/buoyant-2_personal.png deleted file mode 100644 index b2188fdfb1..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-2_personal.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-3_request_path.png b/linkerd.io/static/uploads/2017/07/buoyant-3_request_path.png deleted file mode 100644 index 5648dbdab4..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-3_request_path.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-4_override.png b/linkerd.io/static/uploads/2017/07/buoyant-4_override.png deleted file mode 100644 index a4073f39e5..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-4_override.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-dashboard-1024x689.png b/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-dashboard-1024x689.png deleted file mode 100644 index 5369b0629e..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-dashboard-1024x689.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-none-1024x299.png b/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-none-1024x299.png deleted file mode 100644 index d359f038b4..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-none-1024x299.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-success-rate-1024x299.png b/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-success-rate-1024x299.png deleted file mode 100644 index 12d8f7417f..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-success-rate-1024x299.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-success-rates-1024x548.png b/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-success-rates-1024x548.png deleted file mode 100644 index 11a9b0307b..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-circuit-breaking-success-rates-1024x548.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-grpc-daemonset-1024x617.png b/linkerd.io/static/uploads/2017/07/buoyant-grpc-daemonset-1024x617.png deleted file mode 100644 index 22425d7e15..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-grpc-daemonset-1024x617.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-daemonset-mesh.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-daemonset-mesh.png deleted file mode 100644 index f6688a0efa..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-daemonset-mesh.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-egress-dtab.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-egress-dtab.png deleted file mode 100644 index e5655a0b56..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-egress-dtab.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-hello-world-ingress-controller-1.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-hello-world-ingress-controller-1.png deleted file mode 100644 index 3fa2e19220..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-hello-world-ingress-controller-1.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-hello-world-timeouts.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-hello-world-timeouts.png deleted file mode 100644 index 45235a15c7..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-hello-world-timeouts.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-linkerd-admin-large-1024x737.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-linkerd-admin-large-1024x737.png deleted file mode 100644 index 1f26197a4b..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-linkerd-admin-large-1024x737.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-linkerd-viz-large-1024x739.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-linkerd-viz-large-1024x739.png deleted file mode 100644 index fc98b87235..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-linkerd-viz-large-1024x739.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-diagram.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-diagram.png deleted file mode 100644 index d805e7f144..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-diagram.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-search-1-large-1024x352.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-search-1-large-1024x352.png deleted file mode 100644 index 0f203a1920..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-search-1-large-1024x352.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-span-1-large-1024x712.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-span-1-large-1024x712.png deleted file mode 100644 index bb2cd1f039..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-span-1-large-1024x712.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-trace-1-large-1024x360.png b/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-trace-1-large-1024x360.png deleted file mode 100644 index ac49e0ee9b..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-k8s-tracing-trace-1-large-1024x360.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-l2l-diagram.png b/linkerd.io/static/uploads/2017/07/buoyant-l2l-diagram.png deleted file mode 100644 index ff93c6849c..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-l2l-diagram.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-dashboard.png b/linkerd.io/static/uploads/2017/07/buoyant-linkerd-dashboard.png deleted file mode 100644 index ea3b93f6a8..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-dashboard.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-github-star-history.png b/linkerd.io/static/uploads/2017/07/buoyant-linkerd-github-star-history.png deleted file mode 100644 index f8ef039e4c..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-github-star-history.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-memory-footprint-chart.png b/linkerd.io/static/uploads/2017/07/buoyant-linkerd-memory-footprint-chart.png deleted file mode 100644 index e727edcc39..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-memory-footprint-chart.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-tcp-dashboard-1-large-1024x692.png b/linkerd.io/static/uploads/2017/07/buoyant-linkerd-tcp-dashboard-1-large-1024x692.png deleted file mode 100644 index f7dd8e99e9..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-tcp-dashboard-1-large-1024x692.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-viz-dcos-load.png b/linkerd.io/static/uploads/2017/07/buoyant-linkerd-viz-dcos-load.png deleted file mode 100644 index 827426cb38..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-viz-dcos-load.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-viz-dcos.png b/linkerd.io/static/uploads/2017/07/buoyant-linkerd-viz-dcos.png deleted file mode 100644 index 5b9353846b..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-linkerd-viz-dcos.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-namerd.png b/linkerd.io/static/uploads/2017/07/buoyant-namerd.png deleted file mode 100644 index b94fa5b89f..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-namerd.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-admin-large-1024x737.png b/linkerd.io/static/uploads/2017/07/buoyant-pipeline-admin-large-1024x737.png deleted file mode 100644 index dae2134711..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-admin-large-1024x737.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-build-parameters.png b/linkerd.io/static/uploads/2017/07/buoyant-pipeline-build-parameters.png deleted file mode 100644 index 0fca34de71..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-build-parameters.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-cleanup.png b/linkerd.io/static/uploads/2017/07/buoyant-pipeline-cleanup.png deleted file mode 100644 index 86376d4311..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-cleanup.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-integration-testing.png b/linkerd.io/static/uploads/2017/07/buoyant-pipeline-integration-testing.png deleted file mode 100644 index eac02e2ec6..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-integration-testing.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-shift-traffic-10.png b/linkerd.io/static/uploads/2017/07/buoyant-pipeline-shift-traffic-10.png deleted file mode 100644 index 5eb19e5397..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-shift-traffic-10.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-shift-traffic-100.png b/linkerd.io/static/uploads/2017/07/buoyant-pipeline-shift-traffic-100.png deleted file mode 100644 index 1aa439a047..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-pipeline-shift-traffic-100.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-sidecar.png b/linkerd.io/static/uploads/2017/07/buoyant-sidecar.png deleted file mode 100644 index ab494b62d3..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-sidecar.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-staging-users-v2.png b/linkerd.io/static/uploads/2017/07/buoyant-staging-users-v2.png deleted file mode 100644 index d946946414..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-staging-users-v2.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-zipkin-detail.png b/linkerd.io/static/uploads/2017/07/buoyant-zipkin-detail.png deleted file mode 100644 index cefffb8d9e..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-zipkin-detail.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-zipkin-trace-overview.png b/linkerd.io/static/uploads/2017/07/buoyant-zipkin-trace-overview.png deleted file mode 100644 index 6a0e17236c..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-zipkin-trace-overview.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/buoyant-zipkin-trace.png b/linkerd.io/static/uploads/2017/07/buoyant-zipkin-trace.png deleted file mode 100644 index deef0d041f..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/buoyant-zipkin-trace.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/consecutive-failures.png b/linkerd.io/static/uploads/2017/07/consecutive-failures.png deleted file mode 100644 index 4c32704af4..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/consecutive-failures.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/07/traffic-shifting.png b/linkerd.io/static/uploads/2017/07/traffic-shifting.png deleted file mode 100644 index 6cdedbb625..0000000000 Binary files a/linkerd.io/static/uploads/2017/07/traffic-shifting.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2017/12/Screen-Shot-2017-12-21-at-9.43.47-AM.png b/linkerd.io/static/uploads/2017/12/Screen-Shot-2017-12-21-at-9.43.47-AM.png deleted file mode 100644 index 0779b3de2a..0000000000 Binary files a/linkerd.io/static/uploads/2017/12/Screen-Shot-2017-12-21-at-9.43.47-AM.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/03/Screen-Shot-2018-03-20-at-3.43.11-PM-300x168.png b/linkerd.io/static/uploads/2018/03/Screen-Shot-2018-03-20-at-3.43.11-PM-300x168.png deleted file mode 100644 index 28426353a6..0000000000 Binary files a/linkerd.io/static/uploads/2018/03/Screen-Shot-2018-03-20-at-3.43.11-PM-300x168.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/blog_rbac_configuration.png b/linkerd.io/static/uploads/2018/05/blog_rbac_configuration.png deleted file mode 100644 index 7cf6469f67..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/blog_rbac_configuration.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/collage-birthday.jpg b/linkerd.io/static/uploads/2018/05/collage-birthday.jpg deleted file mode 100644 index ccb8d0dfa8..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/collage-birthday.jpg and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/conduit-grafana-1-1024x556-1024x556.png b/linkerd.io/static/uploads/2018/05/conduit-grafana-1-1024x556-1024x556.png deleted file mode 100644 index f0aba5995d..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/conduit-grafana-1-1024x556-1024x556.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/conduit-prom-1-1024x656-1024x656.png b/linkerd.io/static/uploads/2018/05/conduit-prom-1-1024x656-1024x656.png deleted file mode 100644 index 74ba384ce0..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/conduit-prom-1-1024x656-1024x656.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/conduit-prom-2-1024x509-1024x509.png b/linkerd.io/static/uploads/2018/05/conduit-prom-2-1024x509-1024x509.png deleted file mode 100644 index c3c79042f0..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/conduit-prom-2-1024x509-1024x509.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/ecs-linkerd-viz.png b/linkerd.io/static/uploads/2018/05/ecs-linkerd-viz.png deleted file mode 100644 index 4c7b89aca3..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/ecs-linkerd-viz.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/ecs-tasks-1024x589.png b/linkerd.io/static/uploads/2018/05/ecs-tasks-1024x589.png deleted file mode 100644 index cc0a5d43f5..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/ecs-tasks-1024x589.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/happy-birthday-linkerd-1024x550.jpg b/linkerd.io/static/uploads/2018/05/happy-birthday-linkerd-1024x550.jpg deleted file mode 100644 index 7d5b1b2e5c..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/happy-birthday-linkerd-1024x550.jpg and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/per_request_routing@2x.png b/linkerd.io/static/uploads/2018/05/per_request_routing@2x.png deleted file mode 100644 index 48dd2dfac4..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/per_request_routing@2x.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/service-mesh-for-ECS@2x.png b/linkerd.io/static/uploads/2018/05/service-mesh-for-ECS@2x.png deleted file mode 100644 index accd3925b5..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/service-mesh-for-ECS@2x.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/05/usage.png b/linkerd.io/static/uploads/2018/05/usage.png deleted file mode 100644 index d81e5f2229..0000000000 Binary files a/linkerd.io/static/uploads/2018/05/usage.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2018/08/Pasted-image-at-2018_04_20-09_28-AM-1024x930.png b/linkerd.io/static/uploads/2018/08/Pasted-image-at-2018_04_20-09_28-AM-1024x930.png deleted file mode 100644 index addf11e81b..0000000000 Binary files a/linkerd.io/static/uploads/2018/08/Pasted-image-at-2018_04_20-09_28-AM-1024x930.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2019/03/request_tree@2x.png b/linkerd.io/static/uploads/2019/03/request_tree@2x.png deleted file mode 100644 index a4cb3df56b..0000000000 Binary files a/linkerd.io/static/uploads/2019/03/request_tree@2x.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2019/03/response_trace@2x.png b/linkerd.io/static/uploads/2019/03/response_trace@2x.png deleted file mode 100644 index 3e13d5c8e7..0000000000 Binary files a/linkerd.io/static/uploads/2019/03/response_trace@2x.png and /dev/null differ diff --git a/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-orig.jpg b/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-orig.jpg new file mode 100644 index 0000000000..fb61a2859e Binary files /dev/null and b/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-orig.jpg differ diff --git a/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-rect.jpg b/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-rect.jpg new file mode 100644 index 0000000000..c6906bf548 Binary files /dev/null and b/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-rect.jpg differ diff --git a/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-square.jpg b/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-square.jpg new file mode 100644 index 0000000000..938e2f5803 Binary files /dev/null and b/linkerd.io/static/uploads/2023/06/dnevozhai-routing-7nrsVjvALnA-unsplash-square.jpg differ diff --git a/linkerd.io/static/uploads/2023/06/roundup-clocks-rect.png b/linkerd.io/static/uploads/2023/06/roundup-clocks-rect.png new file mode 100644 index 0000000000..9ccc1c9685 Binary files /dev/null and b/linkerd.io/static/uploads/2023/06/roundup-clocks-rect.png differ diff --git a/linkerd.io/static/uploads/2023/06/roundup-clocks-square.png b/linkerd.io/static/uploads/2023/06/roundup-clocks-square.png new file mode 100644 index 0000000000..e50b77920d Binary files /dev/null and b/linkerd.io/static/uploads/2023/06/roundup-clocks-square.png differ diff --git a/linkerd.io/static/uploads/2023/07/flat_network@2x.png b/linkerd.io/static/uploads/2023/07/flat_network@2x.png new file mode 100644 index 0000000000..202c28187c Binary files /dev/null and b/linkerd.io/static/uploads/2023/07/flat_network@2x.png differ diff --git a/linkerd.io/static/uploads/2023/07/jan-huber-0xNbk7D_s6U-rect.jpg b/linkerd.io/static/uploads/2023/07/jan-huber-0xNbk7D_s6U-rect.jpg new file mode 100644 index 0000000000..030fb9011c Binary files /dev/null and b/linkerd.io/static/uploads/2023/07/jan-huber-0xNbk7D_s6U-rect.jpg differ diff --git a/linkerd.io/static/uploads/2023/07/jan-huber-0xNbk7D_s6U-square.jpg b/linkerd.io/static/uploads/2023/07/jan-huber-0xNbk7D_s6U-square.jpg new file mode 100644 index 0000000000..b1ff5cd63f Binary files /dev/null and b/linkerd.io/static/uploads/2023/07/jan-huber-0xNbk7D_s6U-square.jpg differ diff --git a/linkerd.io/static/uploads/2023/07/mikael-fridh-featured.png b/linkerd.io/static/uploads/2023/07/mikael-fridh-featured.png new file mode 100644 index 0000000000..927b297694 Binary files /dev/null and b/linkerd.io/static/uploads/2023/07/mikael-fridh-featured.png differ diff --git a/linkerd.io/static/uploads/2023/07/mikael-fridh-hero-square.png b/linkerd.io/static/uploads/2023/07/mikael-fridh-hero-square.png new file mode 100644 index 0000000000..ce757785db Binary files /dev/null and b/linkerd.io/static/uploads/2023/07/mikael-fridh-hero-square.png differ diff --git a/linkerd.io/static/uploads/2023/07/mikael-fridh-hero.png b/linkerd.io/static/uploads/2023/07/mikael-fridh-hero.png new file mode 100644 index 0000000000..79ace4864b Binary files /dev/null and b/linkerd.io/static/uploads/2023/07/mikael-fridh-hero.png differ diff --git a/linkerd.io/static/uploads/2023/07/mikael-fridh-portrait.jpeg b/linkerd.io/static/uploads/2023/07/mikael-fridh-portrait.jpeg new file mode 100644 index 0000000000..95b39357b3 Binary files /dev/null and b/linkerd.io/static/uploads/2023/07/mikael-fridh-portrait.jpeg differ diff --git a/linkerd.io/static/uploads/2023/07/nasa-_SFJhRPzJHs-unsplash.jpg b/linkerd.io/static/uploads/2023/07/nasa-_SFJhRPzJHs-unsplash.jpg new file mode 100644 index 0000000000..973ae957db Binary files /dev/null and b/linkerd.io/static/uploads/2023/07/nasa-_SFJhRPzJHs-unsplash.jpg differ diff --git a/linkerd.io/static/uploads/DiagnosticTracing_Linkerd.png b/linkerd.io/static/uploads/DiagnosticTracing_Linkerd.png deleted file mode 100644 index 482787cc88..0000000000 Binary files a/linkerd.io/static/uploads/DiagnosticTracing_Linkerd.png and /dev/null differ diff --git a/linkerd.io/static/uploads/WePay-logo.png b/linkerd.io/static/uploads/WePay-logo.png deleted file mode 100644 index 2443cf4e10..0000000000 Binary files a/linkerd.io/static/uploads/WePay-logo.png and /dev/null differ diff --git a/linkerd.io/static/uploads/a6d4b0bd-conduit.jpg b/linkerd.io/static/uploads/a6d4b0bd-conduit.jpg deleted file mode 100644 index 1f085c732b..0000000000 Binary files a/linkerd.io/static/uploads/a6d4b0bd-conduit.jpg and /dev/null differ diff --git a/linkerd.io/static/uploads/conduit_community_recap.png b/linkerd.io/static/uploads/conduit_community_recap.png deleted file mode 100644 index 97ca15375d..0000000000 Binary files a/linkerd.io/static/uploads/conduit_community_recap.png and /dev/null differ diff --git a/linkerd.io/static/uploads/conduit_introducing_conduit_featured.png b/linkerd.io/static/uploads/conduit_introducing_conduit_featured.png deleted file mode 100644 index b58c53f8fe..0000000000 Binary files a/linkerd.io/static/uploads/conduit_introducing_conduit_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/conduit_webinar_recap.png b/linkerd.io/static/uploads/conduit_webinar_recap.png deleted file mode 100644 index 073dd4cf85..0000000000 Binary files a/linkerd.io/static/uploads/conduit_webinar_recap.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes10_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes10_featured_Twitter_ratio.png deleted file mode 100644 index 38e604c468..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes10_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes11_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes11_featured_Twitter_ratio.png deleted file mode 100644 index e34282e6ed..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes11_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes1_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes1_featured_Twitter_ratio.png deleted file mode 100644 index fb409a4944..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes1_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes2_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes2_featured_Twitter_ratio.png deleted file mode 100644 index e98d15f1a5..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes2_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes3_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes3_featured_Twitter_ratio.png deleted file mode 100644 index 51a6ef014a..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes3_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes4_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes4_featured_Twitter_ratio.png deleted file mode 100644 index 1dc0d9b69d..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes4_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes5_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes5_featured_Twitter_ratio.png deleted file mode 100644 index 232330e4aa..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes5_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes6_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes6_featured_Twitter_ratio.png deleted file mode 100644 index a0469619e6..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes6_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes7_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes7_featured_Twitter_ratio.png deleted file mode 100644 index 21a3fcb662..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes7_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes8_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes8_featured_Twitter_ratio.png deleted file mode 100644 index 58fd74d794..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes8_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/kubernetes9_featured_Twitter_ratio.png b/linkerd.io/static/uploads/kubernetes9_featured_Twitter_ratio.png deleted file mode 100644 index cb9e2ebec5..0000000000 Binary files a/linkerd.io/static/uploads/kubernetes9_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd-graalvm-working-group.png b/linkerd.io/static/uploads/linkerd-graalvm-working-group.png deleted file mode 100644 index ad556e9c5d..0000000000 Binary files a/linkerd.io/static/uploads/linkerd-graalvm-working-group.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_GRPC_featured.png b/linkerd.io/static/uploads/linkerd_GRPC_featured.png deleted file mode 100644 index e15e952b6f..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_GRPC_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_circuit_breaking_featured.png b/linkerd.io/static/uploads/linkerd_circuit_breaking_featured.png deleted file mode 100644 index e9ae574cf4..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_circuit_breaking_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_faster_featured.png b/linkerd.io/static/uploads/linkerd_faster_featured.png deleted file mode 100644 index 109b3da92e..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_faster_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_featured.png b/linkerd.io/static/uploads/linkerd_featured.png deleted file mode 100644 index d5595ff04d..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_featured_DCOS.png b/linkerd.io/static/uploads/linkerd_featured_DCOS.png deleted file mode 100644 index f9391469fe..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_featured_DCOS.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_featured_EASY.png b/linkerd.io/static/uploads/linkerd_featured_EASY.png deleted file mode 100644 index 28ad0aa67c..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_featured_EASY.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_featured_PLAYING_WELL.png b/linkerd.io/static/uploads/linkerd_featured_PLAYING_WELL.png deleted file mode 100644 index ea5f64ac11..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_featured_PLAYING_WELL.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_featured_operability.png b/linkerd.io/static/uploads/linkerd_featured_operability.png deleted file mode 100644 index 069c0a0cec..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_featured_operability.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_featured_ployglot-1.png b/linkerd.io/static/uploads/linkerd_featured_ployglot-1.png deleted file mode 100644 index 130a14e799..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_featured_ployglot-1.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_featured_sidecars.png b/linkerd.io/static/uploads/linkerd_featured_sidecars.png deleted file mode 100644 index 1d9cc4e9ac..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_featured_sidecars.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_featured_transparent.png b/linkerd.io/static/uploads/linkerd_featured_transparent.png deleted file mode 100644 index c0e7bce703..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_featured_transparent.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_hundred_billion_featured.png b/linkerd.io/static/uploads/linkerd_hundred_billion_featured.png deleted file mode 100644 index 2db3e382f8..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_hundred_billion_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_tcp_featured.png b/linkerd.io/static/uploads/linkerd_tcp_featured.png deleted file mode 100644 index 5bbc9f78e1..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_tcp_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_v_060.png b/linkerd.io/static/uploads/linkerd_v_060.png deleted file mode 100644 index c4026eb170..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_v_060.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_version_009_featured.png b/linkerd.io/static/uploads/linkerd_version_009_featured.png deleted file mode 100644 index 74b22fc2cd..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_version_009_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_version_084_featured.png b/linkerd.io/static/uploads/linkerd_version_084_featured.png deleted file mode 100644 index 8323d04935..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_version_084_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_version_12_featured.png b/linkerd.io/static/uploads/linkerd_version_12_featured.png deleted file mode 100644 index fff04fee88..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_version_12_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_version_133_featured.png b/linkerd.io/static/uploads/linkerd_version_133_featured.png deleted file mode 100644 index 3874c62712..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_version_133_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/linkerd_version_1_featured.png b/linkerd.io/static/uploads/linkerd_version_1_featured.png deleted file mode 100644 index a89088aa5f..0000000000 Binary files a/linkerd.io/static/uploads/linkerd_version_1_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/logos/blue/docker.png b/linkerd.io/static/uploads/logos/blue/docker.png new file mode 100644 index 0000000000..d97d64760d Binary files /dev/null and b/linkerd.io/static/uploads/logos/blue/docker.png differ diff --git a/linkerd.io/static/uploads/navigating_the_ecosystem.png b/linkerd.io/static/uploads/navigating_the_ecosystem.png deleted file mode 100644 index 39e88ab033..0000000000 Binary files a/linkerd.io/static/uploads/navigating_the_ecosystem.png and /dev/null differ diff --git a/linkerd.io/static/uploads/prometheus-the-right-way.png b/linkerd.io/static/uploads/prometheus-the-right-way.png deleted file mode 100644 index 21ae510704..0000000000 Binary files a/linkerd.io/static/uploads/prometheus-the-right-way.png and /dev/null differ diff --git a/linkerd.io/static/uploads/service_mesh_featured.png b/linkerd.io/static/uploads/service_mesh_featured.png deleted file mode 100644 index 6178adb807..0000000000 Binary files a/linkerd.io/static/uploads/service_mesh_featured.png and /dev/null differ diff --git a/linkerd.io/static/uploads/shadows1_featured_Twitter_ratio.png b/linkerd.io/static/uploads/shadows1_featured_Twitter_ratio.png deleted file mode 100644 index 6b4b8a0db2..0000000000 Binary files a/linkerd.io/static/uploads/shadows1_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/shadows2_featured_Twitter_ratio.png b/linkerd.io/static/uploads/shadows2_featured_Twitter_ratio.png deleted file mode 100644 index d559f06f7d..0000000000 Binary files a/linkerd.io/static/uploads/shadows2_featured_Twitter_ratio.png and /dev/null differ diff --git a/linkerd.io/static/uploads/version_conduit_011.png b/linkerd.io/static/uploads/version_conduit_011.png deleted file mode 100644 index 5de865c6b2..0000000000 Binary files a/linkerd.io/static/uploads/version_conduit_011.png and /dev/null differ diff --git a/linkerd.io/static/uploads/version_conduit_020.png b/linkerd.io/static/uploads/version_conduit_020.png deleted file mode 100644 index 9a62853f91..0000000000 Binary files a/linkerd.io/static/uploads/version_conduit_020.png and /dev/null differ diff --git a/linkerd.io/static/uploads/version_conduit_030.png b/linkerd.io/static/uploads/version_conduit_030.png deleted file mode 100644 index 7c82e49f14..0000000000 Binary files a/linkerd.io/static/uploads/version_conduit_030.png and /dev/null differ diff --git a/linkerd.io/static/uploads/version_conduit_040.png b/linkerd.io/static/uploads/version_conduit_040.png deleted file mode 100644 index 5ca4fcd973..0000000000 Binary files a/linkerd.io/static/uploads/version_conduit_040.png and /dev/null differ diff --git a/linkerd.io/static/uploads/wepay_image_0.png b/linkerd.io/static/uploads/wepay_image_0.png deleted file mode 100644 index b0f897ca97..0000000000 Binary files a/linkerd.io/static/uploads/wepay_image_0.png and /dev/null differ diff --git a/linkerd.io/static/uploads/wepay_image_1.png b/linkerd.io/static/uploads/wepay_image_1.png deleted file mode 100644 index 77b9659fca..0000000000 Binary files a/linkerd.io/static/uploads/wepay_image_1.png and /dev/null differ