diff --git a/api/v1alpha1/inferencemodel_types.go b/api/v1alpha1/inferencemodel_types.go index 63103181..dea51ba4 100644 --- a/api/v1alpha1/inferencemodel_types.go +++ b/api/v1alpha1/inferencemodel_types.go @@ -144,7 +144,7 @@ const ( // to exist at request time, the error is processed by the Inference Gateway // and emitted on the appropriate InferenceModel object. type TargetModel struct { - // Name is the name of the adapter as expected by the ModelServer. + // Name is the name of the adapter or base model, as expected by the ModelServer. // // +kubebuilder:validation:MaxLength=253 // +kubebuilder:validation:Required @@ -174,10 +174,54 @@ type TargetModel struct { // InferenceModelStatus defines the observed state of InferenceModel type InferenceModelStatus struct { - // Conditions track the state of the InferencePool. + // Conditions track the state of the InferenceModel. + // + // Known condition types are: + // + // * "Accepted" + // + // +optional + // +listType=map + // +listMapKey=type + // +kubebuilder:validation:MaxItems=8 + // +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} Conditions []metav1.Condition `json:"conditions,omitempty"` } +// InferenceModelConditionType is a type of condition for the InferenceModel. +type InferenceModelConditionType string + +// InferenceModelConditionReason is the reason for a given InferenceModelConditionType. +type InferenceModelConditionReason string + +const ( + // This condition indicates if the model config is accepted, and if not, why. + // + // Possible reasons for this condition to be True are: + // + // * "Accepted" + // + // Possible reasons for this condition to be False are: + // + // * "ModelNameInUse" + // + // Possible reasons for this condition to be Unknown are: + // + // * "Pending" + // + ModelConditionAccepted InferenceModelConditionType = "Accepted" + + // Desired state. Model conforms to the state of the pool. + ModelReasonAccepted InferenceModelConditionReason = "Accepted" + + // This reason is used when a given ModelName already exists within the pool. + // Details about naming conflict resolution are on the ModelName field itself. + ModelReasonNameInUse InferenceModelConditionReason = "ModelNameInUse" + + // This reason is the initial state, and indicates that the controller has not yet reconciled the InferenceModel. + ModelReasonPending InferenceModelConditionReason = "Pending" +) + func init() { SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{}) } diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 852c7267..a9e15d31 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -68,6 +68,7 @@ type InferencePoolSpec struct { // // LabelKey is the key of a label. This is used for validation // of maps. This matches the Kubernetes "qualified name" validation that is used for labels. +// Labels are case sensitive, so: my-label and My-Label are considered distinct. // // Valid values include: // @@ -106,9 +107,52 @@ type LabelValue string // InferencePoolStatus defines the observed state of InferencePool type InferencePoolStatus struct { // Conditions track the state of the InferencePool. + // + // Known condition types are: + // + // * "Ready" + // + // +optional + // +listType=map + // +listMapKey=type + // +kubebuilder:validation:MaxItems=8 + // +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} Conditions []metav1.Condition `json:"conditions,omitempty"` } +// InferencePoolConditionType is a type of condition for the InferencePool +type InferencePoolConditionType string + +// InferencePoolConditionReason is the reason for a given InferencePoolConditionType +type InferencePoolConditionReason string + +const ( + // This condition indicates if the pool is ready to accept traffic, and if not, why. + // + // Possible reasons for this condition to be True are: + // + // * "Ready" + // + // Possible reasons for this condition to be False are: + // + // * "EndpointPickerNotHealthy" + // + // Possible reasons for this condition to be Unknown are: + // + // * "Pending" + // + PoolConditionReady InferencePoolConditionType = "Ready" + + // Desired state. The pool and its components are initialized and ready for traffic. + PoolReasonReady InferencePoolConditionReason = "Ready" + + // This reason is used when the EPP has not yet passed health checks, or has started failing them. + PoolReasonEPPNotHealthy InferencePoolConditionReason = "EndpointPickerNotHealthy" + + // This reason is the initial state, and indicates that the controller has not yet reconciled this pool. + PoolReasonPending InferencePoolConditionReason = "Pending" +) + func init() { SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{}) } diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml index 7fe1d561..ffdceddb 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml @@ -116,8 +116,8 @@ spec: and emitted on the appropriate InferenceModel object. properties: name: - description: Name is the name of the adapter as expected by - the ModelServer. + description: Name is the name of the adapter or base model, + as expected by the ModelServer. maxLength: 253 type: string weight: @@ -154,7 +154,18 @@ spec: description: InferenceModelStatus defines the observed state of InferenceModel properties: conditions: - description: Conditions track the state of the InferencePool. + default: + - lastTransitionTime: "1970-01-01T00:00:00Z" + message: Waiting for controller + reason: Pending + status: Unknown + type: Ready + description: |- + Conditions track the state of the InferenceModel. + + Known condition types are: + + * "Accepted" items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -209,7 +220,11 @@ spec: - status - type type: object + maxItems: 8 type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index d4500a13..de5f40bc 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -81,7 +81,18 @@ spec: description: InferencePoolStatus defines the observed state of InferencePool properties: conditions: - description: Conditions track the state of the InferencePool. + default: + - lastTransitionTime: "1970-01-01T00:00:00Z" + message: Waiting for controller + reason: Pending + status: Unknown + type: Ready + description: |- + Conditions track the state of the InferencePool. + + Known condition types are: + + * "Ready" items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -136,7 +147,11 @@ spec: - status - type type: object + maxItems: 8 type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true