forked from kubeflow/training-operator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
paddlepaddle_types.go
134 lines (115 loc) · 5.14 KB
/
paddlepaddle_types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// Copyright 2022 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v1
import (
autoscalingv2 "k8s.io/api/autoscaling/v2"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
const (
// PaddleJobDefaultPortName is name of the port used to communicate between Master and
// workers.
PaddleJobDefaultPortName = "master"
// PaddleJobDefaultContainerName is the name of the PaddleJob container.
PaddleJobDefaultContainerName = "paddle"
// PaddleJobDefaultPort is default value of the port.
PaddleJobDefaultPort = 36543
// PaddleJobDefaultRestartPolicy is default RestartPolicy for PaddleReplicaSpec.
PaddleJobDefaultRestartPolicy = RestartPolicyOnFailure
// PaddleJobKind is the kind name.
PaddleJobKind = "PaddleJob"
// PaddleJobPlural is the PaddlePlural for paddleJob.
PaddleJobPlural = "paddlejobs"
// PaddleJobSingular is the singular for paddleJob.
PaddleJobSingular = "paddlejob"
// PaddleJobFrameworkName is the name of the ML Framework
PaddleJobFrameworkName = "paddle"
// PaddleJobReplicaTypeMaster is the type of Master of distributed Paddle
PaddleJobReplicaTypeMaster ReplicaType = "Master"
// PaddleJobReplicaTypeWorker is the type for workers of distributed Paddle.
PaddleJobReplicaTypeWorker ReplicaType = "Worker"
)
// genclient
// k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// resource:path=paddlejob
// kubebuilder:object:root=true
// kubebuilder:subresource:status
// kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.conditions[-1:].type`
// kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
// kubebuilder:subresource:scale:specpath=.spec.paddleReplicaSpecs.Worker.replicas,statuspath=.status.replicaStatuses.Worker.active,selectorpath=.status.replicaStatuses.Worker.selector
// PaddleJob Represents a PaddleJob resource.
type PaddleJob struct {
// Standard Kubernetes type metadata.
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
// Specification of the desired state of the PaddleJob.
Spec PaddleJobSpec `json:"spec,omitempty"`
// Most recently observed status of the PaddleJob.
// Read-only (modified by the system).
Status JobStatus `json:"status,omitempty"`
}
// PaddleJobSpec is a desired state description of the PaddleJob.
type PaddleJobSpec struct {
// RunPolicy encapsulates various runtime policies of the distributed training
// job, for example how to clean up resources and how long the job can stay
// active.
// kubebuilder:validation:Optional
RunPolicy RunPolicy `json:"runPolicy"`
// ElasticPolicy holds the elastic policy for paddle job.
ElasticPolicy *PaddleElasticPolicy `json:"elasticPolicy,omitempty"`
// A map of PaddleReplicaType (type) to ReplicaSpec (value). Specifies the Paddle cluster configuration.
// For example,
// {
// "Master": PaddleReplicaSpec,
// "Worker": PaddleReplicaSpec,
// }
PaddleReplicaSpecs map[ReplicaType]*ReplicaSpec `json:"paddleReplicaSpecs"`
}
type PaddleElasticPolicy struct {
// minReplicas is the lower limit for the number of replicas to which the training job
// can scale down. It defaults to null.
// optional
MinReplicas *int32 `json:"minReplicas,omitempty"`
// upper limit for the number of pods that can be set by the autoscaler; cannot be smaller than MinReplicas, defaults to null.
// optional
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
// MaxRestarts is the limit for restart times of pods in elastic mode.
// optional
MaxRestarts *int32 `json:"maxRestarts,omitempty"`
// Metrics contains the specifications which are used to calculate the
// desired replica count (the maximum replica count across all metrics will
// be used). The desired replica count is calculated with multiplying the
// ratio between the target value and the current value by the current
// number of pods. Ergo, metrics used must decrease as the pod count is
// increased, and vice-versa. See the individual metric source types for
// more information about how each type of metric must respond.
// If not set, the HPA will not be created.
// optional
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
}
// k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// resource:path=paddlejobs
// kubebuilder:object:root=true
// PaddleJobList is a list of PaddleJobs.
type PaddleJobList struct {
// Standard type metadata.
metav1.TypeMeta `json:",inline"`
// Standard list metadata.
metav1.ListMeta `json:"metadata,omitempty"`
// List of PaddleJobs.
Items []PaddleJob `json:"items"`
}
func init() {
SchemeBuilder.Register(&PaddleJob{}, &PaddleJobList{})
SchemeBuilder.SchemeBuilder.Register(addPaddleDefaultingFuncs)
}