From 8a380e52c48bf902a9a9fe68fca49475d8f06fdd Mon Sep 17 00:00:00 2001 From: Jose Diaz-Gonzalez Date: Tue, 14 Apr 2020 16:14:37 -0400 Subject: [PATCH] feat: enable limiting and reserving gpu resources --- docs/advanced-usage/resource-management.md | 13 +++++++++++++ docs/advanced-usage/schedulers/docker-local.md | 2 ++ plugins/resource/resource.go | 1 + plugins/resource/src/subcommands/subcommands.go | 4 ++++ plugins/resource/subcommands.go | 3 +++ plugins/resource/triggers.go | 5 +++++ tests/unit/20_resource.bats | 4 ---- 7 files changed, 28 insertions(+), 4 deletions(-) diff --git a/docs/advanced-usage/resource-management.md b/docs/advanced-usage/resource-management.md index 819a435d7..a0197748e 100644 --- a/docs/advanced-usage/resource-management.md +++ b/docs/advanced-usage/resource-management.md @@ -26,6 +26,7 @@ Valid resource options include: - `--network` - `--network-ingress` - `--network-egress` +- `--nvidia-gpu` See the [Supported Resource Management Properties](/docs/advanced-usage/schedulers/docker-local.md#supported-resource-management-properties) section of the docker local scheduler documentation for more information on how each resource limit maps to Docker. @@ -86,6 +87,7 @@ dokku resource:limit node-js-app network: 100 network-ingress: network-egress: + nvidia-gpu: ``` This may also be combined with the `--process-type` flag to see app limits on a process-type level. Note that the displayed values are not merged with the defaults. @@ -103,6 +105,7 @@ dokku resource:limit --process-type web node-js-app network: network-ingress: network-egress: + nvidia-gpu: ``` #### Clearing Resource Limits @@ -182,6 +185,7 @@ dokku resource:reserve node-js-app network: network-ingress: network-egress: + nvidia-gpu: ``` This may also be combined with the `--process-type` flag to see app reservations on a process-type level. Note that the displayed values are not merged with the defaults. @@ -198,6 +202,7 @@ dokku resource:reserve --process-type web node-js-app network: network-ingress: network-egress: + nvidia-gpu: ``` #### Clearing Resource Reservations @@ -238,12 +243,14 @@ dokku resource:report web limit network: 10 web limit network ingress: web limit network egress: + web limit nvidia gpu: web reservation cpu: web reservation memory: 512 web reservation memory swap: web reservation network: 8 web reservation network ingress: web reservation network egress: + web reservation nvidia gpu: =====> python-sample resource information web limit cpu: web limit memory: @@ -251,12 +258,14 @@ dokku resource:report web limit network: web limit network ingress: web limit network egress: + web limit nvidia gpu: web reservation cpu: web reservation memory: web reservation memory swap: web reservation network: web reservation network ingress: web reservation network egress: + web reservation nvidia gpu: =====> ruby-sample resource information web limit cpu: web limit memory: @@ -264,12 +273,14 @@ dokku resource:report web limit network: web limit network ingress: web limit network egress: + web limit nvidia gpu: web reservation cpu: web reservation memory: web reservation memory swap: web reservation network: web reservation network ingress: web reservation network egress: + web reservation nvidia gpu: ``` You can run the command for a specific app also. @@ -286,12 +297,14 @@ dokku resource:report node-js-app web limit network: 10 web limit network ingress: web limit network egress: + web limit nvidia gpu: web reservation cpu: web reservation memory: 512 web reservation memory swap: web reservation network: 8 web reservation network ingress: web reservation network egress: + web reservation nvidia gpu: ``` You can pass flags which will output only the value of the specific information you want. For example: diff --git a/docs/advanced-usage/schedulers/docker-local.md b/docs/advanced-usage/schedulers/docker-local.md index 73dd41061..92604da41 100644 --- a/docs/advanced-usage/schedulers/docker-local.md +++ b/docs/advanced-usage/schedulers/docker-local.md @@ -71,6 +71,8 @@ The `docker-local` scheduler supports a minimal list of resource _limits_ and _r - See the ["Memory" section](https://docs.docker.com/config/containers/resource_constraints/#memory) of the Docker Runtime Options documentation for more information. - memory-swap: (docker option: `--memory-swap`) and should be specified with a suffix of `b` (bytes), `k` (kilobytes), `m` (megabytes), `g` (gigabytes) - See the ["Memory" section](https://docs.docker.com/config/containers/resource_constraints/#memory) of the Docker Runtime Options documentation for more information. +- nvidia-gpus: (docker option: `--gpus`), is specified in number of Nvidia GPUs a process can access. + - See the ["GPU" section](https://docs.docker.com/config/containers/resource_constraints/#gpu) of the Docker Runtime Options documentation for more information. ### Resource Reservations diff --git a/plugins/resource/resource.go b/plugins/resource/resource.go index 9888b488e..04fcef4b3 100644 --- a/plugins/resource/resource.go +++ b/plugins/resource/resource.go @@ -15,6 +15,7 @@ type Resource struct { Network string `json:"network"` NetworkIngress string `json:"network-ingress"` NetworkEgress string `json:"network-egress"` + NvidiaGPU string `json:"nvidia-gpu"` } // ReportSingleApp is an internal function that displays the app report for one or more apps diff --git a/plugins/resource/src/subcommands/subcommands.go b/plugins/resource/src/subcommands/subcommands.go index c3b105fa9..713fa04c9 100644 --- a/plugins/resource/src/subcommands/subcommands.go +++ b/plugins/resource/src/subcommands/subcommands.go @@ -26,6 +26,7 @@ func main() { network := args.String("network", "", "network: The amount of network bandwidth to limit processes by") networkIngress := args.String("network-ingress", "", "network-ingress: The amount of ingress network bandwidth to limit processes by") networkEgress := args.String("network-egress", "", "network-egress: The amount of egress network bandwidth to limit processes by") + nvidiaGpu := args.String("nvidia-gpu", "", "nvidia-gpu: The number of Nvidia GPUs to limit a process to") args.Parse(os.Args[2:]) resources := resource.Resource{ @@ -35,6 +36,7 @@ func main() { Network: *network, NetworkIngress: *networkIngress, NetworkEgress: *networkEgress, + NvidiaGPU: *nvidiaGpu, } err = resource.CommandLimit(args.Args(), *processType, resources) @@ -57,6 +59,7 @@ func main() { network := args.String("network", "", "network: The amount of network bandwidth to reserve for processes") networkIngress := args.String("network-ingress", "", "network-ingress: The amount of ingress network bandwidth to reserve for processes") networkEgress := args.String("network-egress", "", "network-egress: The amount of egress network bandwidth to reserve for processes") + nvidiaGpu := args.String("nvidia-gpu", "", "nvidia-gpu: The number of Nvidia GPUs to resource for a process") args.Parse(os.Args[2:]) resources := resource.Resource{ @@ -66,6 +69,7 @@ func main() { Network: *network, NetworkIngress: *networkIngress, NetworkEgress: *networkEgress, + NvidiaGPU: *nvidiaGpu, } err = resource.CommandReserve(args.Args(), *processType, resources) diff --git a/plugins/resource/subcommands.go b/plugins/resource/subcommands.go index 4366f021e..8107dceaf 100644 --- a/plugins/resource/subcommands.go +++ b/plugins/resource/subcommands.go @@ -100,6 +100,7 @@ func clearByResourceType(appName string, processType string, resourceType string "network", "network-ingress", "network-egress", + "nvidia-gpu", } for _, key := range resources { @@ -121,6 +122,7 @@ func setResourceType(appName string, processType string, r Resource, resourceTyp "network": r.Network, "network-ingress": r.NetworkIngress, "network-egress": r.NetworkEgress, + "nvidia-gpu": r.NvidiaGPU, } hasValues := false @@ -182,6 +184,7 @@ func reportResourceType(appName string, processType string, resourceType string) "network", "network-ingress", "network-egress", + "nvidia-gpu", } for _, key := range resources { diff --git a/plugins/resource/triggers.go b/plugins/resource/triggers.go index 1714c8a6e..01db7f0d1 100644 --- a/plugins/resource/triggers.go +++ b/plugins/resource/triggers.go @@ -32,6 +32,7 @@ func TriggerDockerArgsProcessDeploy(appName string, processType string) error { validLimits := map[string]bool{ "cpu": true, + "nvidia-gpu": true, "memory": true, "memory-swap": true, } @@ -54,6 +55,10 @@ func TriggerDockerArgsProcessDeploy(appName string, processType string) error { parts[1] = "cpus" } + if parts[1] == "nvidia-gpu" { + parts[1] = "gpus" + } + limits[parts[1]] = value } if parts[0] == "reserve" { diff --git a/tests/unit/20_resource.bats b/tests/unit/20_resource.bats index 4c2a13c4f..5a3e61571 100644 --- a/tests/unit/20_resource.bats +++ b/tests/unit/20_resource.bats @@ -141,10 +141,6 @@ teardown() { echo "output: $output" echo "status: $status" assert_output "536870912" - # run /bin/bash -c "docker inspect --format '{{.HostConfig.NanoCpus}}' $CID" - # echo "output: $output" - # echo "status: $status" - # assert_output "500000000" run /bin/bash -c "dokku resource:reserve-clear --process-type worker $TEST_APP" echo "output: $output"