Skip to content

Commit

Permalink
Support workload identity in runs (#139)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnstairs authored Sep 9, 2024
1 parent b655585 commit f6d6a4c
Show file tree
Hide file tree
Showing 21 changed files with 364 additions and 80 deletions.
6 changes: 6 additions & 0 deletions .devcontainer/install-tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,9 @@ wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-
apt-get update
DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends \
postgresql-client-16

# install az-pim
AZ_PIM_VERSION=0.5.0
wget "https://github.com/demoray/azure-pim-cli/releases/download/0.5.0/az-pim-linux-musl-${AZ_PIM_VERSION}" \
&& sudo mv "az-pim-linux-musl-${AZ_PIM_VERSION}" /usr/bin/az-pim \
&& sudo chmod +x /usr/bin/az-pim
8 changes: 8 additions & 0 deletions Makefile.cloud
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ AUTO_MIGRATE=false
DOCKER_BUILD_ARCH_FLAGS = --arch amd64
DOCKER_BUILD_PUSH_FLAGS = --push --push-force

pim-elevate:
default_justification="Deploying Tyger"
read -p "Enter a justification (default: \"$${default_justification}\"): " justification
justification=$${justification:-$${default_justification}}

subscription=$$(echo '${ENVIRONMENT_CONFIG_JSON}' | jq -r '.cloud.subscriptionId')
az-pim activate role --subscription "$${subscription}" Owner "$${justification}"

ensure-environment: check-az-login install-cli
tyger cloud install -f <(scripts/get-config.sh)

Expand Down
80 changes: 79 additions & 1 deletion cli/integrationtest/controlplane_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/microsoft/tyger/cli/internal/controlplane/model"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/api/resource"
"sigs.k8s.io/yaml"
Expand All @@ -39,6 +40,7 @@ import (
const (
BasicImage = "mcr.microsoft.com/cbl-mariner/base/core:2.0"
GpuImage = "nvidia/cuda:11.0.3-base-ubuntu20.04"
AzCliImage = "mcr.microsoft.com/azure-cli:2.64.0"
)

func init() {
Expand Down Expand Up @@ -853,7 +855,7 @@ func TestOpenApiSpecIsAsExpected(t *testing.T) {

curlCommand = fmt.Sprintf("curl --unix %s %s ", strings.Split(client.ControlPlaneUrl.Path, ":")[0], u.String())
} else {
curlCommand = fmt.Sprintf("curl %s ", client.ControlPlaneUrl)
curlCommand = fmt.Sprintf("curl %s ", swaggerUri)
}

t.Errorf("Result not as expected. To update, run `%s > %s`\n\nDiff:%v",
Expand Down Expand Up @@ -1571,6 +1573,82 @@ timeoutSeconds: 600`
runTygerSucceeds(t, "run", "exec", "--file", runSpecPath, "--pull")
}

func TestWorkloadIdentity(t *testing.T) {
t.Parallel()
skipIfUsingUnixSocket(t)

require := require.New(t)

runSpec := fmt.Sprintf(`
job:
codespec:
image: %s
identity: test-identity
command:
- "sh"
- "-c"
- |
set -euo pipefail
az login --federated-token "$(cat $AZURE_FEDERATED_TOKEN_FILE)" --service-principal -u $AZURE_CLIENT_ID -t $AZURE_TENANT_ID --allow-no-subscriptions
az account get-access-token > /dev/null
timeoutSeconds: 600`, AzCliImage)

tempDir := t.TempDir()
runSpecPath := filepath.Join(tempDir, "runspec.yaml")
require.NoError(os.WriteFile(runSpecPath, []byte(runSpec), 0644))

runTygerSucceeds(t, "run", "exec", "--file", runSpecPath, "--logs")
}

func TestMissingWorkloadIdentity(t *testing.T) {
t.Parallel()
skipIfUsingUnixSocket(t)

require := require.New(t)

runSpec := fmt.Sprintf(`
job:
codespec:
image: %s
command:
- "sh"
- "-c"
- |
set -euo pipefail
az login --federated-token "$(cat $AZURE_FEDERATED_TOKEN_FILE)" --service-principal -u $AZURE_CLIENT_ID -t $AZURE_TENANT_ID --allow-no-subscriptions
az account get-access-token > /dev/null
timeoutSeconds: 600`, AzCliImage)

tempDir := t.TempDir()
runSpecPath := filepath.Join(tempDir, "runspec.yaml")
require.NoError(os.WriteFile(runSpecPath, []byte(runSpec), 0644))

_, _, err := runTyger("run", "exec", "--file", runSpecPath, "--logs")
assert.Error(t, err)
}

func TestWorkloadIdentityWithInvalidIdentity(t *testing.T) {
t.Parallel()
skipIfUsingUnixSocket(t)

require := require.New(t)

runSpec := fmt.Sprintf(`
job:
codespec:
image: %s
identity: invalid-identity
command: date
timeoutSeconds: 600`, BasicImage)

tempDir := t.TempDir()
runSpecPath := filepath.Join(tempDir, "runspec.yaml")
require.NoError(os.WriteFile(runSpecPath, []byte(runSpec), 0644))

_, _, err := runTyger("run", "exec", "--file", runSpecPath, "--logs")
assert.Error(t, err)
}

func waitForRunStarted(t *testing.T, runId string) model.Run {
t.Helper()
return waitForRun(t, runId, true, false)
Expand Down
6 changes: 5 additions & 1 deletion cli/integrationtest/expected_openapi_spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,10 @@ components:
type: string
description: Environment variables to set in the container
nullable: true
identity:
type: string
description: The workload identity to use. Only supported in cloud environments.
nullable: true
resources:
$ref: '#/components/schemas/CodespecResources'
maxReplicas:
Expand Down Expand Up @@ -749,4 +753,4 @@ components:
nullable: true
description: The name and port of the endpoints that the worker exposes.
nullable: true
additionalProperties: false
additionalProperties: false
8 changes: 7 additions & 1 deletion cli/internal/cmd/codespec.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ func newCodespecCreateCommand() *cobra.Command {
outputBuffers []string
env map[string]string
command bool
identity string
requests overcommittableResourceStrings
limits overcommittableResourceStrings
gpu string
Expand All @@ -63,7 +64,7 @@ func newCodespecCreateCommand() *cobra.Command {
}

var cmd = &cobra.Command{
Use: `create NAME [--file YAML_SPEC] [--image IMAGE] [--kind job|worker] [--max-replicas REPLICAS] [[--input BUFFER_NAME] ...] [[--output BUFFER_NAME] ...] [[--env \"KEY=VALUE\"] ...] [[ --endpoint SERVICE=PORT ]] [--gpu QUANTITY] [--cpu-request QUANTITY] [--memory-request QUANTITY] [--cpu-limit QUANTITY] [--memory-limit QUANTITY] [--command] -- [COMMAND] [args...]`,
Use: `create NAME [--file YAML_SPEC] [--image IMAGE] [--kind job|worker] [--max-replicas REPLICAS] [[--input BUFFER_NAME] ...] [[--output BUFFER_NAME] ...] [[--env \"KEY=VALUE\"] ...] [--identity IDENTITY] [[ --endpoint SERVICE=PORT ]] [--gpu QUANTITY] [--cpu-request QUANTITY] [--memory-request QUANTITY] [--cpu-limit QUANTITY] [--memory-limit QUANTITY] [--command] -- [COMMAND] [args...]`,
Short: "Create or update a codespec",
Long: `Create or update a codespec. Outputs the version of the codespec that was created.`,
DisableFlagsInUseLine: true,
Expand Down Expand Up @@ -134,6 +135,10 @@ func newCodespecCreateCommand() *cobra.Command {
newCodespec.Env = flags.env
}

if hasFlagChanged(cmd, "identity") {
newCodespec.Identity = flags.identity
}

if hasFlagChanged(cmd, "endpoint") {
newCodespec.Endpoints = flags.endpoints
}
Expand Down Expand Up @@ -276,6 +281,7 @@ func newCodespecCreateCommand() *cobra.Command {
cmd.Flags().StringToStringVarP(&flags.env, "env", "e", nil, "Environment variables to set in the container in the form KEY=value")
cmd.Flags().StringToIntVar(&flags.endpoints, "endpoint", nil, "TCP endpoints in the form NAME=PORT. Only valid for worker codespecs.")
cmd.Flags().BoolVar(&flags.command, "command", false, "If true and extra arguments are present, use them as the 'command' field in the container, rather than the 'args' field which is the default.")
cmd.Flags().StringVar(&flags.identity, "identity", "", "The workload identity to use for this codespec.")
cmd.Flags().StringVar(&flags.requests.cpu, "cpu-request", "", "CPU cores requested")
cmd.Flags().StringVar(&flags.requests.memory, "memory-request", "", "memory bytes requested")
cmd.Flags().StringVar(&flags.limits.cpu, "cpu-limit", "", "CPU cores limit")
Expand Down
1 change: 1 addition & 0 deletions cli/internal/controlplane/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ type Codespec struct {
Args []string `json:"args,omitempty"`
WorkingDir string `json:"workingDir,omitempty"`
Env map[string]string `json:"env,omitempty"`
Identity string `json:"identity,omitempty"`
Resources *CodespecResources `json:"resources,omitempty"`
MaxReplicas *int `json:"maxReplicas,omitempty"`
Endpoints map[string]int `json:"endpoints,omitempty"`
Expand Down
18 changes: 18 additions & 0 deletions cli/internal/install/cloudinstall/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions"
"github.com/fatih/color"
Expand Down Expand Up @@ -236,6 +237,17 @@ func (inst *Installer) createPromises(ctx context.Context) install.PromiseGroup
tygerServerManagedIdentityPromise := install.NewPromise(ctx, group, inst.createTygerServerManagedIdentity)
migrationRunnerManagedIdentityPromise := install.NewPromise(ctx, group, inst.createMigrationRunnerManagedIdentity)

customIdentityPromises := make([]*install.Promise[*armmsi.Identity], 0)

for _, identityName := range inst.Config.Cloud.Compute.Identities {
identityName := identityName
customIdentityPromises = append(customIdentityPromises, install.NewPromise(ctx, group, func(ctx context.Context) (*armmsi.Identity, error) {
return inst.createManagedIdentity(ctx, identityName)
}))
}

install.NewPromise(ctx, group, inst.deleteUnusedIdentities)

install.NewPromise(ctx, group, func(ctx context.Context) (any, error) {
return inst.createDatabase(ctx, tygerServerManagedIdentityPromise, migrationRunnerManagedIdentityPromise)
})
Expand All @@ -256,6 +268,12 @@ func (inst *Installer) createPromises(ctx context.Context) install.PromiseGroup
return inst.createFederatedIdentityCredential(ctx, migrationRunnerManagedIdentityPromise, createClusterPromise)
})
}

for _, identityPromise := range customIdentityPromises {
install.NewPromise(ctx, group, func(ctx context.Context) (any, error) {
return inst.createFederatedIdentityCredential(ctx, identityPromise, createClusterPromise)
})
}
}

getAdminCredsPromise := install.NewPromiseAfter(ctx, group, inst.getAdminRESTConfig, createApiHostClusterPromise)
Expand Down
1 change: 1 addition & 0 deletions cli/internal/install/cloudinstall/cloudconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type ComputeConfig struct {
ManagementPrincipals []Principal `json:"managementPrincipals"`
LocalDevelopmentIdentityId string `json:"localDevelopmentIdentityId"` // undocumented - for local development only
PrivateContainerRegistries []string `json:"privateContainerRegistries"`
Identities []string `json:"identities"`
}

func (c *ComputeConfig) GetManagementPrincipalIds() []string {
Expand Down
6 changes: 6 additions & 0 deletions cli/internal/install/cloudinstall/config.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ cloud:
# privateContainerRegistries:
# - myprivateregistry

# An optional array of managed identities that will be created in the resource group.
# These identities are available to runs as workload identities. When updating this list
# both `tyger cloud install` and `tyger api installed` must be run.
# identities:
# - my-identity

database:
serverName: {{ .DatabaseServerName }}
postgresMajorVersion: {{ .PostgresMajorVersion }}
Expand Down
22 changes: 21 additions & 1 deletion cli/internal/install/cloudinstall/helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,10 @@ func (inst *Installer) InstallTygerHelmChart(ctx context.Context, restConfig *re
if err != nil {
return "", "", fmt.Errorf("failed to marshal cluster configuration: %w", err)
}
clustersConfig := make([]map[string]any, 0)
if err := json.Unmarshal(clustersConfigJson, &clustersConfig); err != nil {
return "", "", fmt.Errorf("failed to unmarshal cluster configuration: %w", err)
}

identitiesClient, err := armmsi.NewUserAssignedIdentitiesClient(inst.Config.Cloud.SubscriptionID, inst.Credential, nil)
if err != nil {
Expand All @@ -349,6 +353,21 @@ func (inst *Installer) InstallTygerHelmChart(ctx context.Context, restConfig *re
return "", "", fmt.Errorf("failed to get managed identity: %w", err)
}

customIdentitiesValues := make([]map[string]any, 0)
for _, identity := range inst.Config.Cloud.Compute.Identities {
identity, err := identitiesClient.Get(ctx, inst.Config.Cloud.ResourceGroup, identity, nil)
if err != nil {
return "", "", fmt.Errorf("failed to get managed identity: %w", err)
}

customIdentitiesValues = append(
customIdentitiesValues,
map[string]any{
"name": identity.Name,
"clientId": identity.Properties.ClientID,
})
}

storageClient, err := armstorage.NewAccountsClient(inst.Config.Cloud.SubscriptionID, inst.Credential, nil)
if err != nil {
return "", "", fmt.Errorf("failed to create storage client: %w", err)
Expand Down Expand Up @@ -409,6 +428,7 @@ func (inst *Installer) InstallTygerHelmChart(ctx context.Context, restConfig *re
"name": migrationRunnerIdentity.Name,
"clientId": migrationRunnerIdentity.Properties.ClientID,
},
"custom": customIdentitiesValues,
},
"security": map[string]any{
"enabled": true,
Expand All @@ -432,7 +452,7 @@ func (inst *Installer) InstallTygerHelmChart(ctx context.Context, restConfig *re
"logArchive": map[string]any{
"storageAccountEndpoint": *logArchiveAccount.Properties.PrimaryEndpoints.Blob,
},
"clusterConfigurationJson": string(clustersConfigJson),
"clusterConfiguration": clustersConfig,
},
}

Expand Down
Loading

0 comments on commit f6d6a4c

Please sign in to comment.