diff --git a/api/groupversion_info.go b/api/groupversion_info.go new file mode 100644 index 00000000..7ff9c399 --- /dev/null +++ b/api/groupversion_info.go @@ -0,0 +1,45 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1alpha1 contains API Schema definitions for the gateway v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=inference.networking.x-k8s.io +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "inference.networking.x-k8s.io", Version: "v1alpha1"} + + // SchemeGroupVersion is alias to GroupVersion for client-go libraries. + // It is required by pkg/client/informers/externalversions/... + SchemeGroupVersion = GroupVersion + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) + +// Resource is required by pkg/client/listers/... +func Resource(resource string) schema.GroupResource { + return GroupVersion.WithResource(resource).GroupResource() +} diff --git a/api/inferencemodel_types.go b/api/inferencemodel_types.go new file mode 100644 index 00000000..766ecfef --- /dev/null +++ b/api/inferencemodel_types.go @@ -0,0 +1,167 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// InferenceModelSpec represents a specific model use case. This resource is +// managed by the "Inference Workload Owner" persona. +// +// The Inference Workload Owner persona is: a team that trains, verifies, and +// leverages a large language model from a model frontend, drives the lifecycle +// and rollout of new versions of those models, and defines the specific +// performance and latency goals for the model. These workloads are +// expected to operate within an InferencePool sharing compute capacity with other +// InferenceModels, defined by the Inference Platform Admin. +// +// InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool, +// if the name is reused, an error will be shown on the status of a +// InferenceModel that attempted to reuse. The oldest InferenceModel, based on +// creation timestamp, will be selected to remain valid. In the event of a race +// condition, one will be selected at random. +type InferenceModelSpec struct { + // The name of the model as the users set in the "model" parameter in the requests. + // The name should be unique among the workloads that reference the same backend pool. + // This is the parameter that will be used to match the request with. In the future, we may + // allow to match on other request parameters. The other approach to support matching on + // on other request parameters is to use a different ModelName per HTTPFilter. + // Names can be reserved without implementing an actual model in the pool. + // This can be done by specifying a target model and setting the weight to zero, + // an error will be returned specifying that no valid target model is found. + // + // +optional + // +kubebuilder:validation:MaxLength=253 + ModelName string `json:"modelName,omitempty"` + // Defines how important it is to serve the model compared to other models referencing the same pool. + // + // +optional + // +kubebuilder:default="Default" + Criticality *Criticality `json:"criticality,omitempty"` + // Allow multiple versions of a model for traffic splitting. + // If not specified, the target model name is defaulted to the modelName parameter. + // modelName is often in reference to a LoRA adapter. + // + // +optional + // +kubebuilder:validation:MaxItems=10 + TargetModels []TargetModel `json:"targetModels,omitempty"` + // Reference to the inference pool, the pool must exist in the same namespace. + // + // +kubebuilder:validation:Required + PoolRef PoolObjectReference `json:"poolRef"` +} + +// PoolObjectReference identifies an API object within the namespace of the +// referrer. +type PoolObjectReference struct { + // Group is the group of the referent. + // + // +optional + // +kubebuilder:default="inference.networking.x-k8s.io" + // +kubebuilder:validation:MaxLength=253 + // +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$` + Group string `json:"group,omitempty"` + + // Kind is kind of the referent. For example "InferencePool". + // + // +optional + // +kubebuilder:default="InferencePool" + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=63 + // +kubebuilder:validation:Pattern=`^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$` + Kind string `json:"kind,omitempty"` + + // Name is the name of the referent. + // + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=253 + // +kubebuilder:validation:Required + Name string `json:"name,omitempty"` +} + +// Defines how important it is to serve the model compared to other models. +// +kubebuilder:validation:Enum=Critical;Default;Sheddable +type Criticality string + +const ( + // Most important. Requests to this band will be shed last. + Critical Criticality = "Critical" + // More important than Sheddable, less important than Critical. + // Requests in this band will be shed before critical traffic. + // +kubebuilder:default=Default + Default Criticality = "Default" + // Least important. Requests to this band will be shed before all other bands. + Sheddable Criticality = "Sheddable" +) + +// TargetModel represents a deployed model or a LoRA adapter. The +// Name field is expected to match the name of the LoRA adapter +// (or base model) as it is registered within the model server. Inference +// Gateway assumes that the model exists on the model server and is the +// responsibility of the user to validate a correct match. Should a model fail +// to exist at request time, the error is processed by the Instance Gateway, +// and then emitted on the appropriate InferenceModel object. +type TargetModel struct { + // The name of the adapter as expected by the ModelServer. + // + // +optional + // +kubebuilder:validation:MaxLength=253 + Name string `json:"name,omitempty"` + // Weight is used to determine the proportion of traffic that should be + // sent to this target model when multiple versions of the model are specified. + // + // +optional + // +kubebuilder:default=1 + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1000000 + Weight int32 `json:"weight,omitempty"` +} + +// InferenceModelStatus defines the observed state of InferenceModel +type InferenceModelStatus struct { + // Conditions track the state of the InferencePool. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +genclient + +// InferenceModel is the Schema for the InferenceModels API +type InferenceModel struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec InferenceModelSpec `json:"spec,omitempty"` + Status InferenceModelStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// InferenceModelList contains a list of InferenceModel +type InferenceModelList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []InferenceModel `json:"items"` +} + +func init() { + SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{}) +} diff --git a/api/inferencepool_types.go b/api/inferencepool_types.go new file mode 100644 index 00000000..d7559c3f --- /dev/null +++ b/api/inferencepool_types.go @@ -0,0 +1,119 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// InferencePoolSpec defines the desired state of InferencePool +type InferencePoolSpec struct { + + // Selector uses a map of label to watch model server pods + // that should be included in the InferencePool. ModelServers should not + // be with any other Service or InferencePool, that behavior is not supported + // and will result in sub-optimal utilization. + // In some cases, implementations may translate this to a Service selector, so this matches the simple + // map used for Service selectors instead of the full Kubernetes LabelSelector type. + // + // +kubebuilder:validation:Required + Selector map[LabelKey]LabelValue `json:"selector,omitempty"` + + // TargetPortNumber is the port number that the model servers within the pool expect + // to recieve traffic from. + // This maps to the TargetPort in: https://pkg.go.dev/k8s.io/api/core/v1#ServicePort + // + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=65535 + // +kubebuilder:validation:Required + TargetPortNumber int32 `json:"targetPortNumber,omitempty"` +} + +// Originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731 +// Duplicated as to not take an unexpected dependency on gw's API. +// +// LabelKey is the key of a label. This is used for validation +// of maps. This matches the Kubernetes "qualified name" validation that is used for labels. +// +// Valid values include: +// +// * example +// * example.com +// * example.com/path +// * example.com/path.html +// +// Invalid values include: +// +// * example~ - "~" is an invalid character +// * example.com. - can not start or end with "." +// +// +kubebuilder:validation:MinLength=1 +// +kubebuilder:validation:MaxLength=253 +// +kubebuilder:validation:Pattern=`^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9]$` +type LabelKey string + +// LabelValue is the value of a label. This is used for validation +// of maps. This matches the Kubernetes label validation rules: +// * must be 63 characters or less (can be empty), +// * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]), +// * could contain dashes (-), underscores (_), dots (.), and alphanumerics between. +// +// Valid values include: +// +// * MyValue +// * my.name +// * 123-my-value +// +// +kubebuilder:validation:MinLength=0 +// +kubebuilder:validation:MaxLength=63 +// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$` +type LabelValue string + +// InferencePoolStatus defines the observed state of InferencePool +type InferencePoolStatus struct { + + // Conditions track the state of the InferencePool. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +genclient + +// InferencePool is the Schema for the Inferencepools API +type InferencePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec InferencePoolSpec `json:"spec,omitempty"` + Status InferencePoolStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// InferencePoolList contains a list of InferencePool +type InferencePoolList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []InferencePool `json:"items"` +} + +func init() { + SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{}) +} diff --git a/api/zz_generated.deepcopy.go b/api/zz_generated.deepcopy.go new file mode 100644 index 00000000..4f17fbd0 --- /dev/null +++ b/api/zz_generated.deepcopy.go @@ -0,0 +1,266 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModel) DeepCopyInto(out *InferenceModel) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModel. +func (in *InferenceModel) DeepCopy() *InferenceModel { + if in == nil { + return nil + } + out := new(InferenceModel) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *InferenceModel) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModelList) DeepCopyInto(out *InferenceModelList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]InferenceModel, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelList. +func (in *InferenceModelList) DeepCopy() *InferenceModelList { + if in == nil { + return nil + } + out := new(InferenceModelList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *InferenceModelList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModelSpec) DeepCopyInto(out *InferenceModelSpec) { + *out = *in + if in.Criticality != nil { + in, out := &in.Criticality, &out.Criticality + *out = new(Criticality) + **out = **in + } + if in.TargetModels != nil { + in, out := &in.TargetModels, &out.TargetModels + *out = make([]TargetModel, len(*in)) + copy(*out, *in) + } + out.PoolRef = in.PoolRef +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelSpec. +func (in *InferenceModelSpec) DeepCopy() *InferenceModelSpec { + if in == nil { + return nil + } + out := new(InferenceModelSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceModelStatus) DeepCopyInto(out *InferenceModelStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelStatus. +func (in *InferenceModelStatus) DeepCopy() *InferenceModelStatus { + if in == nil { + return nil + } + out := new(InferenceModelStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferencePool) DeepCopyInto(out *InferencePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePool. +func (in *InferencePool) DeepCopy() *InferencePool { + if in == nil { + return nil + } + out := new(InferencePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *InferencePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferencePoolList) DeepCopyInto(out *InferencePoolList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]InferencePool, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolList. +func (in *InferencePoolList) DeepCopy() *InferencePoolList { + if in == nil { + return nil + } + out := new(InferencePoolList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *InferencePoolList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { + *out = *in + if in.Selector != nil { + in, out := &in.Selector, &out.Selector + *out = make(map[LabelKey]LabelValue, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. +func (in *InferencePoolSpec) DeepCopy() *InferencePoolSpec { + if in == nil { + return nil + } + out := new(InferencePoolSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferencePoolStatus) DeepCopyInto(out *InferencePoolStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolStatus. +func (in *InferencePoolStatus) DeepCopy() *InferencePoolStatus { + if in == nil { + return nil + } + out := new(InferencePoolStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PoolObjectReference) DeepCopyInto(out *PoolObjectReference) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolObjectReference. +func (in *PoolObjectReference) DeepCopy() *PoolObjectReference { + if in == nil { + return nil + } + out := new(PoolObjectReference) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TargetModel) DeepCopyInto(out *TargetModel) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetModel. +func (in *TargetModel) DeepCopy() *TargetModel { + if in == nil { + return nil + } + out := new(TargetModel) + in.DeepCopyInto(out) + return out +}