Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: nvidia-dra-plugin Config #38

Merged
merged 1 commit into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
/set-nas-status
/nvidia-dra-controller
/nvidia-dra-plugin
.idea
[._]*.sw[a-p]
2 changes: 1 addition & 1 deletion cmd/nvidia-dra-plugin/device_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
nvdevlib: nvdevlib,
}

err = state.syncPreparedDevicesFromCRDSpec(ctx, &config.nascrd.Spec)
err = state.syncPreparedDevicesFromCRDSpec(ctx, &config.nascr.Spec)
if err != nil {
return nil, fmt.Errorf("unable to sync prepared devices from CRD: %w", err)
}
Expand Down
21 changes: 10 additions & 11 deletions cmd/nvidia-dra-plugin/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,14 @@ const (

type driver struct {
sync.Mutex
// TODO: Rename to nascr
nascrd *nascrd.NodeAllocationState
nascr *nascrd.NodeAllocationState
nasclient *nasclient.Client
state *DeviceState
}

func NewDriver(ctx context.Context, config *Config) (*driver, error) {
var d *driver
client := nasclient.New(config.nascrd, config.clientset.Nvidia.NasV1alpha1())
client := nasclient.New(config.nascr, config.clientsets.Nvidia.NasV1alpha1())
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err := client.GetOrCreate(ctx)
if err != nil {
Expand All @@ -63,7 +62,7 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
return err
}

err = client.Update(ctx, state.GetUpdatedSpec(&config.nascrd.Spec))
err = client.Update(ctx, state.GetUpdatedSpec(&config.nascr.Spec))
if err != nil {
return err
}
Expand All @@ -74,7 +73,7 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
}

d = &driver{
nascrd: config.nascrd,
nascr: config.nascr,
nasclient: client,
state: state,
}
Expand Down Expand Up @@ -137,7 +136,7 @@ func (d *driver) IsPrepared(ctx context.Context, claimUID string) (bool, []strin
if err != nil {
return false, nil, err
}
if _, exists := d.nascrd.Spec.PreparedClaims[claimUID]; exists {
if _, exists := d.nascr.Spec.PreparedClaims[claimUID]; exists {
return true, d.state.cdi.GetClaimDevices(claimUID), nil
}
return false, nil, nil
Expand All @@ -152,12 +151,12 @@ func (d *driver) Prepare(ctx context.Context, claimUID string) ([]string, error)
return err
}

prepared, err = d.state.Prepare(ctx, claimUID, d.nascrd.Spec.AllocatedClaims[claimUID])
prepared, err = d.state.Prepare(ctx, claimUID, d.nascr.Spec.AllocatedClaims[claimUID])
if err != nil {
return err
}

err = d.nasclient.Update(ctx, d.state.GetUpdatedSpec(&d.nascrd.Spec))
err = d.nasclient.Update(ctx, d.state.GetUpdatedSpec(&d.nascr.Spec))
if err != nil {
return err
}
Expand All @@ -182,7 +181,7 @@ func (d *driver) Unprepare(ctx context.Context, claimUID string) error {
return err
}

err = d.nasclient.Update(ctx, d.state.GetUpdatedSpec(&d.nascrd.Spec))
err = d.nasclient.Update(ctx, d.state.GetUpdatedSpec(&d.nascr.Spec))
if err != nil {
return err
}
Expand Down Expand Up @@ -212,7 +211,7 @@ func (d *driver) CleanupStaleStateContinuously(ctx context.Context) {

func (d *driver) cleanupStaleStateOnce(ctx context.Context) (string, error) {
listOptions := metav1.ListOptions{
FieldSelector: fmt.Sprintf("metadata.name=%s", d.nascrd.Name),
FieldSelector: fmt.Sprintf("metadata.name=%s", d.nascr.Name),
}

list, err := d.nasclient.List(ctx, listOptions)
Expand All @@ -237,7 +236,7 @@ func (d *driver) cleanupStaleStateContinuously(ctx context.Context, resourceVers
watchOptions := metav1.ListOptions{
Watch: true,
ResourceVersion: resourceVersion,
FieldSelector: fmt.Sprintf("metadata.name=%s", d.nascrd.Name),
FieldSelector: fmt.Sprintf("metadata.name=%s", d.nascr.Name),
}

if previousError != nil {
Expand Down
14 changes: 6 additions & 8 deletions cmd/nvidia-dra-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,9 @@ type Flags struct {
}

type Config struct {
flags *Flags
// TODO: Rename to nascr
nascrd *nascrd.NodeAllocationState
// TODO: Rename to clientsets
clientset flags.ClientSets
flags *Flags
nascr *nascrd.NodeAllocationState
clientsets flags.ClientSets
}

func main() {
Expand Down Expand Up @@ -132,9 +130,9 @@ func newApp() *cli.App {
}

config := &Config{
flags: flags,
nascrd: nascr,
clientset: clientSets,
flags: flags,
nascr: nascr,
clientsets: clientSets,
}

return StartPlugin(ctx, config)
Expand Down
16 changes: 8 additions & 8 deletions cmd/nvidia-dra-plugin/sharing.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ func NewMpsManager(config *Config, deviceLib *deviceLib, controlFilesRoot, hostD

func (m *MpsManager) NewMpsControlDaemon(claim *nascrd.ClaimInfo, devices *PreparedDevices, config *nascrd.MpsConfig) *MpsControlDaemon {
return &MpsControlDaemon{
nodeName: m.config.nascrd.Name,
namespace: m.config.nascrd.Namespace,
nodeName: m.config.nascr.Name,
namespace: m.config.nascr.Namespace,
name: fmt.Sprintf(MpsControlDaemonNameFmt, claim.UID),
claim: claim,
rootDir: fmt.Sprintf("%s/%s", m.controlFilesRoot, claim.UID),
Expand All @@ -147,7 +147,7 @@ func (m *MpsManager) NewMpsControlDaemon(claim *nascrd.ClaimInfo, devices *Prepa

func (m *MpsManager) IsControlDaemonStarted(ctx context.Context, claim *nascrd.ClaimInfo) (bool, error) {
name := fmt.Sprintf(MpsControlDaemonNameFmt, claim.UID)
_, err := m.config.clientset.Core.AppsV1().Deployments(m.config.nascrd.Namespace).Get(ctx, name, metav1.GetOptions{})
_, err := m.config.clientsets.Core.AppsV1().Deployments(m.config.nascr.Namespace).Get(ctx, name, metav1.GetOptions{})
if errors.IsNotFound(err) {
return false, nil
}
Expand All @@ -159,7 +159,7 @@ func (m *MpsManager) IsControlDaemonStarted(ctx context.Context, claim *nascrd.C

func (m *MpsManager) IsControlDaemonStopped(ctx context.Context, claim *nascrd.ClaimInfo) (bool, error) {
name := fmt.Sprintf(MpsControlDaemonNameFmt, claim.UID)
_, err := m.config.clientset.Core.AppsV1().Deployments(m.config.nascrd.Namespace).Get(ctx, name, metav1.GetOptions{})
_, err := m.config.clientsets.Core.AppsV1().Deployments(m.config.nascr.Namespace).Get(ctx, name, metav1.GetOptions{})
if errors.IsNotFound(err) {
return true, nil
}
Expand Down Expand Up @@ -263,7 +263,7 @@ func (m *MpsControlDaemon) Start(ctx context.Context) error {
}
}

_, err = m.manager.config.clientset.Core.AppsV1().Deployments(m.namespace).Create(ctx, &deployment, metav1.CreateOptions{})
_, err = m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Create(ctx, &deployment, metav1.CreateOptions{})
if errors.IsAlreadyExists(err) {
return nil
}
Expand All @@ -289,7 +289,7 @@ func (m *MpsControlDaemon) AssertReady(ctx context.Context) error {
return true
},
func() error {
deployment, err := m.manager.config.clientset.Core.AppsV1().Deployments(m.namespace).Get(
deployment, err := m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Get(
ctx,
m.name,
metav1.GetOptions{},
Expand All @@ -304,7 +304,7 @@ func (m *MpsControlDaemon) AssertReady(ctx context.Context) error {

selector := deployment.Spec.Selector.MatchLabels

pods, err := m.manager.config.clientset.Core.CoreV1().Pods(m.namespace).List(
pods, err := m.manager.config.clientsets.Core.CoreV1().Pods(m.namespace).List(
ctx,
metav1.ListOptions{
LabelSelector: labels.Set(selector).AsSelector().String(),
Expand Down Expand Up @@ -366,7 +366,7 @@ func (m *MpsControlDaemon) Stop(ctx context.Context) error {
PropagationPolicy: &deletePolicy,
}

err = m.manager.config.clientset.Core.AppsV1().Deployments(m.namespace).Delete(ctx, m.name, deleteOptions)
err = m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Delete(ctx, m.name, deleteOptions)
if err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("failed to delete deployment: %w", err)
}
Expand Down