diff --git a/cmd/virt-controller/main.go b/cmd/virt-controller/main.go index d4a76f7..ff36a3f 100644 --- a/cmd/virt-controller/main.go +++ b/cmd/virt-controller/main.go @@ -4,6 +4,7 @@ import ( "flag" "os" + netv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -27,6 +28,7 @@ func init() { utilruntime.Must(virtv1alpha1.AddToScheme(scheme)) utilruntime.Must(cdiv1beta1.AddToScheme(scheme)) + utilruntime.Must(netv1.AddToScheme(scheme)) } // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete diff --git a/cmd/virt-prerunner/main.go b/cmd/virt-prerunner/main.go index 34c5ac3..5bff440 100644 --- a/cmd/virt-prerunner/main.go +++ b/cmd/virt-prerunner/main.go @@ -17,6 +17,7 @@ import ( "github.com/docker/libnetwork/resolvconf" "github.com/docker/libnetwork/types" + netv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" "github.com/subgraph/libmacouflage" "github.com/vishvananda/netlink" @@ -68,6 +69,13 @@ func main() { } } + if len(vmConfig.Devices) > 0 { + cloudHypervisorCmd = append(cloudHypervisorCmd, "--device") + for _, device := range vmConfig.Devices { + cloudHypervisorCmd = append(cloudHypervisorCmd, fmt.Sprintf("id=%s,path=%s", device.Id, device.Path)) + } + } + fmt.Println(strings.Join(cloudHypervisorCmd, " ")) } @@ -137,29 +145,48 @@ func buildVMConfig(ctx context.Context, vm *virtv1alpha1.VirtualMachine) (*cloud } } + networkStatusList := []netv1.NetworkStatus{} + if os.Getenv("NETWORK_STATUS") != "" { + if err := json.Unmarshal([]byte(os.Getenv("NETWORK_STATUS")), &networkStatusList); err != nil { + return nil, err + } + } + for _, iface := range vm.Spec.Instance.Interfaces { for networkIndex, network := range vm.Spec.Networks { - if network.Name == iface.Name { + if network.Name != iface.Name { + continue + } + + var linkName string + switch { + case network.Pod != nil: + linkName = "eth0" + case network.Multus != nil: + linkName = fmt.Sprintf("net%d", networkIndex) + default: + return nil, fmt.Errorf("invalid source of network %q", network.Name) + } + + switch { + case iface.Bridge != nil: netConfig := cloudhypervisor.NetConfig{ Id: iface.Name, } - - var linkName string - switch { - case network.Pod != nil: - linkName = "eth0" - case network.Multus != nil: - linkName = fmt.Sprintf("net%d", networkIndex) - default: - return nil, fmt.Errorf("invalid source of network %q", network.Name) - } - if err := setupBridgeNetwork(linkName, fmt.Sprintf("169.254.%d.1/30", 200+networkIndex), &netConfig); err != nil { return nil, fmt.Errorf("setup bridge network: %s", err) } - vmConfig.Net = append(vmConfig.Net, &netConfig) - break + case iface.SRIOV != nil: + for _, networkStatus := range networkStatusList { + if networkStatus.Interface == linkName && networkStatus.DeviceInfo != nil && networkStatus.DeviceInfo.Pci != nil { + sriovDeviceConfig := cloudhypervisor.DeviceConfig{ + Id: iface.Name, + Path: fmt.Sprintf("/sys/bus/pci/devices/%s", networkStatus.DeviceInfo.Pci.PciAddress), + } + vmConfig.Devices = append(vmConfig.Devices, &sriovDeviceConfig) + } + } } } } diff --git a/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml b/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml index 25ba5ff..400d6bd 100644 --- a/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml +++ b/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml @@ -886,8 +886,12 @@ spec: interfaces: items: properties: + bridge: + type: object name: type: string + sriov: + type: object required: - name type: object diff --git a/deploy/virt-controller/role.yaml b/deploy/virt-controller/role.yaml index ab16ad9..771e7c3 100644 --- a/deploy/virt-controller/role.yaml +++ b/deploy/virt-controller/role.yaml @@ -65,6 +65,14 @@ rules: - patch - update - watch +- apiGroups: + - k8s.cni.cncf.io + resources: + - network-attachment-definitions + verbs: + - get + - list + - watch - apiGroups: - virt.virtink.smartx.com resources: diff --git a/pkg/apis/virt/v1alpha1/types.go b/pkg/apis/virt/v1alpha1/types.go index fc3bdee..9403d17 100644 --- a/pkg/apis/virt/v1alpha1/types.go +++ b/pkg/apis/virt/v1alpha1/types.go @@ -80,7 +80,19 @@ type Disk struct { } type Interface struct { - Name string `json:"name"` + Name string `json:"name"` + InterfaceBindingMethod `json:",inline"` +} + +type InterfaceBindingMethod struct { + Bridge *InterfaceBridge `json:"bridge,omitempty"` + SRIOV *InterfaceSRIOV `json:"sriov,omitempty"` +} + +type InterfaceBridge struct { +} + +type InterfaceSRIOV struct { } type Volume struct { diff --git a/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go index 77a72ff..22fb66d 100644 --- a/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go @@ -132,7 +132,9 @@ func (in *Instance) DeepCopyInto(out *Instance) { if in.Interfaces != nil { in, out := &in.Interfaces, &out.Interfaces *out = make([]Interface, len(*in)) - copy(*out, *in) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } return } @@ -150,6 +152,7 @@ func (in *Instance) DeepCopy() *Instance { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Interface) DeepCopyInto(out *Interface) { *out = *in + in.InterfaceBindingMethod.DeepCopyInto(&out.InterfaceBindingMethod) return } @@ -163,6 +166,64 @@ func (in *Interface) DeepCopy() *Interface { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InterfaceBindingMethod) DeepCopyInto(out *InterfaceBindingMethod) { + *out = *in + if in.Bridge != nil { + in, out := &in.Bridge, &out.Bridge + *out = new(InterfaceBridge) + **out = **in + } + if in.SRIOV != nil { + in, out := &in.SRIOV, &out.SRIOV + *out = new(InterfaceSRIOV) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InterfaceBindingMethod. +func (in *InterfaceBindingMethod) DeepCopy() *InterfaceBindingMethod { + if in == nil { + return nil + } + out := new(InterfaceBindingMethod) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InterfaceBridge) DeepCopyInto(out *InterfaceBridge) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InterfaceBridge. +func (in *InterfaceBridge) DeepCopy() *InterfaceBridge { + if in == nil { + return nil + } + out := new(InterfaceBridge) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InterfaceSRIOV) DeepCopyInto(out *InterfaceSRIOV) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InterfaceSRIOV. +func (in *InterfaceSRIOV) DeepCopy() *InterfaceSRIOV { + if in == nil { + return nil + } + out := new(InterfaceSRIOV) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Kernel) DeepCopyInto(out *Kernel) { *out = *in diff --git a/pkg/controller/vm_controller.go b/pkg/controller/vm_controller.go index e2f2b39..3fd9d66 100644 --- a/pkg/controller/vm_controller.go +++ b/pkg/controller/vm_controller.go @@ -11,6 +11,7 @@ import ( netv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -39,6 +40,7 @@ type VMReconciler struct { // +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch // +kubebuilder:rbac:groups=cdi.kubevirt.io,resources=datavolumes,verbs=get;list;watch +// +kubebuilder:rbac:groups=k8s.cni.cncf.io,resources=network-attachment-definitions,verbs=get;list;watch func (r *VMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { var vm virtv1alpha1.VirtualMachine @@ -433,6 +435,28 @@ func (r *VMReconciler) buildVMPod(ctx context.Context, vm *virtv1alpha1.VirtualM Name: network.Multus.NetworkName, InterfaceRequest: fmt.Sprintf("net%d", i), }) + + var nad netv1.NetworkAttachmentDefinition + nadKey := types.NamespacedName{ + Name: network.Multus.NetworkName, + Namespace: vm.Namespace, + } + if err := r.Client.Get(ctx, nadKey, &nad); err != nil { + return nil, fmt.Errorf("get NAD: %s", err) + } + + resourceName := nad.Annotations["k8s.v1.cni.cncf.io/resourceName"] + if resourceName != "" { + incrementContainerResource(&vmPod.Spec.Containers[0], resourceName) + } + vmPod.Spec.Containers[0].Env = append(vmPod.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "NETWORK_STATUS", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: fmt.Sprintf("metadata.annotations['%s']", netv1.NetworkStatusAnnot), + }, + }, + }) default: // ignored } @@ -472,6 +496,22 @@ func (r *VMReconciler) gcVMPods(ctx context.Context, vm *virtv1alpha1.VirtualMac return nil } +func incrementContainerResource(container *corev1.Container, resourceName string) { + if container.Resources.Requests == nil { + container.Resources.Requests = corev1.ResourceList{} + } + request := container.Resources.Requests[corev1.ResourceName(resourceName)] + request = resource.MustParse(strconv.FormatInt(request.Value()+1, 10)) + container.Resources.Requests[corev1.ResourceName(resourceName)] = request + + if container.Resources.Limits == nil { + container.Resources.Limits = corev1.ResourceList{} + } + limit := container.Resources.Limits[corev1.ResourceName(resourceName)] + limit = resource.MustParse(strconv.FormatInt(limit.Value()+1, 10)) + container.Resources.Limits[corev1.ResourceName(resourceName)] = limit +} + func (r *VMReconciler) SetupWithManager(mgr ctrl.Manager) error { if err := mgr.GetFieldIndexer().IndexField(context.Background(), &corev1.Pod{}, "vmUID", func(obj client.Object) []string { pod := obj.(*corev1.Pod) diff --git a/pkg/controller/vm_webhook.go b/pkg/controller/vm_webhook.go index 9609278..e03454b 100644 --- a/pkg/controller/vm_webhook.go +++ b/pkg/controller/vm_webhook.go @@ -76,6 +76,14 @@ func MutateVM(ctx context.Context, vm *virtv1alpha1.VirtualMachine, oldVM *virtv if vm.Spec.Instance.Memory.Size == nil { vm.Spec.Instance.Memory.Size = vm.Spec.Resources.Requests.Memory() } + + for i := range vm.Spec.Instance.Interfaces { + if vm.Spec.Instance.Interfaces[i].Bridge == nil && vm.Spec.Instance.Interfaces[i].SRIOV == nil { + vm.Spec.Instance.Interfaces[i].InterfaceBindingMethod = virtv1alpha1.InterfaceBindingMethod{ + Bridge: &virtv1alpha1.InterfaceBridge{}, + } + } + } return nil } @@ -277,6 +285,33 @@ func ValidateInterface(ctx context.Context, iface *virtv1alpha1.Interface, field if iface.Name == "" { errs = append(errs, field.Required(fieldPath.Child("name"), "")) } + errs = append(errs, ValidateInterfaceBindingMethod(ctx, &iface.InterfaceBindingMethod, fieldPath)...) + return errs +} + +func ValidateInterfaceBindingMethod(ctx context.Context, bindingMethod *virtv1alpha1.InterfaceBindingMethod, fieldPath *field.Path) field.ErrorList { + var errs field.ErrorList + if bindingMethod == nil { + errs = append(errs, field.Required(fieldPath, "")) + return errs + } + + cnt := 0 + if bindingMethod.Bridge != nil { + cnt++ + if cnt > 1 { + errs = append(errs, field.Forbidden(fieldPath.Child("bridge"), "may not specify more than 1 binding method")) + } + } + if bindingMethod.SRIOV != nil { + cnt++ + if cnt > 1 { + errs = append(errs, field.Forbidden(fieldPath.Child("sriov"), "may not specify more than 1 binding method")) + } + } + if cnt == 0 { + errs = append(errs, field.Required(fieldPath, "at least 1 binding method is required")) + } return errs } diff --git a/pkg/controller/vm_webhook_test.go b/pkg/controller/vm_webhook_test.go index 07fda7c..1a6d5b4 100644 --- a/pkg/controller/vm_webhook_test.go +++ b/pkg/controller/vm_webhook_test.go @@ -26,6 +26,9 @@ func TestValidateVM(t *testing.T) { }}, Interfaces: []virtv1alpha1.Interface{{ Name: "net-1", + InterfaceBindingMethod: virtv1alpha1.InterfaceBindingMethod{ + Bridge: &virtv1alpha1.InterfaceBridge{}, + }, }}, }, Volumes: []virtv1alpha1.Volume{{ @@ -100,6 +103,21 @@ func TestValidateVM(t *testing.T) { }(), invalidFields: []string{"spec.instance.interfaces[0].name"}, }, { + vm: func() *virtv1alpha1.VirtualMachine { + vm := validVM.DeepCopy() + vm.Spec.Instance.Interfaces[0].Bridge = nil + return vm + }(), + invalidFields: []string{"spec.instance.interfaces[0]"}, + }, { + vm: func() *virtv1alpha1.VirtualMachine { + vm := validVM.DeepCopy() + vm.Spec.Instance.Interfaces[0].InterfaceBindingMethod.SRIOV = &virtv1alpha1.InterfaceSRIOV{} + return vm + }(), + invalidFields: []string{"spec.instance.interfaces[0].sriov"}, + }, { + vm: func() *virtv1alpha1.VirtualMachine { vm := validVM.DeepCopy() vm.Spec.Volumes[0].Name = "" diff --git a/samples/ubuntu-sriov.yaml b/samples/ubuntu-sriov.yaml new file mode 100644 index 0000000..ffeae54 --- /dev/null +++ b/samples/ubuntu-sriov.yaml @@ -0,0 +1,64 @@ +apiVersion: virt.virtink.smartx.com/v1alpha1 +kind: VirtualMachine +metadata: + name: ubuntu-sriov +spec: + instance: + memory: + size: 1Gi + disks: + - name: ubuntu + - name: cloud-init + interfaces: + - name: pod + - name: sriov + sriov: {} + volumes: + - name: ubuntu + containerDisk: + image: smartxworks/virtink-container-disk-ubuntu + - name: cloud-init + cloudInit: + userData: |- + #cloud-config + password: password + chpasswd: { expire: False } + ssh_pwauth: True + networks: + - name: pod + pod: {} + - name: sriov + multus: + networkName: intel-10g-sriov-vfio +--- +apiVersion: k8s.cni.cncf.io/v1 +kind: NetworkAttachmentDefinition +metadata: + name: intel-10g-sriov-vfio + annotations: + k8s.v1.cni.cncf.io/resourceName: intel.com/intel_10g_sriov_vfio +spec: + config: | + { + "cniVersion": "0.3.1", + "type": "sriov" + } +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sriovdp-config + namespace: kube-system +data: + config.json: | + { + "resourceList": [{ + "resourceName": "intel_10g_sriov_vfio", + "resourcePrefix": "intel.com", + "selectors": { + "vendors": ["8086"], + "devices": ["1520"], + "drivers": ["vfio-pci"] + } + }] + }