diff --git a/Taskfile.yml b/Taskfile.yml index 913878e0..edc845f3 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -45,6 +45,7 @@ tasks: desc: Cleans temp files and folders cmds: - rm -rf dist/ + - rm -rf tmp/ lint: desc: Runs golint @@ -52,9 +53,15 @@ tasks: - golint {{catLines .GO_PACKAGES}} silent: true + sleepit: + desc: Builds the sleepit test helper + dir: tmp + cmds: + - go build ../internal/sleepit + test: desc: Runs test suite - deps: [install] + deps: [install, sleepit] cmds: - go test {{catLines .GO_PACKAGES}} diff --git a/internal/sleepit/main.go b/internal/sleepit/main.go new file mode 100644 index 00000000..5186e478 --- /dev/null +++ b/internal/sleepit/main.go @@ -0,0 +1,177 @@ +// This code is released under the MIT License +// Copyright (c) 2020 Marco Molteni and the timeit contributors. + +package main + +import ( + "flag" + "fmt" + "os" + "os/signal" + "time" +) + +const usage = `sleepit: sleep for the specified duration, optionally handling signals +When the line "sleepit: ready" is printed, it means that it is safe to send signals to it + +Usage: sleepit [] + +Commands + + default Use default action: on reception of SIGINT terminate abruptly + handle Handle signals: on reception of SIGINT perform cleanup before exiting + version Show the sleepit version +` + +var ( + // Filled by the linker. + fullVersion = "unknown" // example: v0.0.9-8-g941583d027-dirty +) + +func main() { + os.Exit(run(os.Args[1:])) +} + +func run(args []string) int { + if len(args) < 1 { + fmt.Fprintln(os.Stderr, usage) + return 2 + } + + defaultCmd := flag.NewFlagSet("default", flag.ExitOnError) + defaultSleep := defaultCmd.Duration("sleep", 5*time.Second, "Sleep duration") + + handleCmd := flag.NewFlagSet("handle", flag.ExitOnError) + handleSleep := handleCmd.Duration("sleep", 5*time.Second, "Sleep duration") + handleCleanup := handleCmd.Duration("cleanup", 5*time.Second, "Cleanup duration") + handleTermAfter := handleCmd.Int("term-after", 0, + "Terminate immediately after `N` signals.\n"+ + "Default is to terminate only when the cleanup phase has completed.") + + versionCmd := flag.NewFlagSet("version", flag.ExitOnError) + + switch args[0] { + + case "default": + defaultCmd.Parse(args[1:]) + if len(defaultCmd.Args()) > 0 { + fmt.Fprintf(os.Stderr, "default: unexpected arguments: %v\n", defaultCmd.Args()) + return 2 + } + return supervisor(*defaultSleep, 0, 0, nil) + + case "handle": + handleCmd.Parse(args[1:]) + if *handleTermAfter == 1 { + fmt.Fprintf(os.Stderr, "handle: term-after cannot be 1\n") + return 2 + } + if len(handleCmd.Args()) > 0 { + fmt.Fprintf(os.Stderr, "handle: unexpected arguments: %v\n", handleCmd.Args()) + return 2 + } + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, os.Interrupt) // Ctrl-C -> SIGINT + return supervisor(*handleSleep, *handleCleanup, *handleTermAfter, sigCh) + + case "version": + versionCmd.Parse(args[1:]) + if len(versionCmd.Args()) > 0 { + fmt.Fprintf(os.Stderr, "version: unexpected arguments: %v\n", versionCmd.Args()) + return 2 + } + fmt.Printf("sleepit version %s\n", fullVersion) + return 0 + + default: + fmt.Fprintln(os.Stderr, usage) + return 2 + } +} + +func supervisor( + sleep time.Duration, + cleanup time.Duration, + termAfter int, + sigCh <-chan os.Signal, +) int { + fmt.Printf("sleepit: ready\n") + fmt.Printf("sleepit: PID=%d sleep=%v cleanup=%v\n", + os.Getpid(), sleep, cleanup) + + cancelWork := make(chan struct{}) + workerDone := worker(cancelWork, sleep, "work") + + cancelCleaner := make(chan struct{}) + var cleanerDone <-chan struct{} + + sigCount := 0 + for { + select { + case sig := <-sigCh: + sigCount++ + fmt.Printf("sleepit: got signal=%s count=%d\n", sig, sigCount) + if sigCount == 1 { + // since `cancelWork` is unbuffered, sending will be synchronous: + // we are ensured that the worker has terminated before starting cleanup. + // This is important in some real-life situations. + cancelWork <- struct{}{} + cleanerDone = worker(cancelCleaner, cleanup, "cleanup") + } + if sigCount == termAfter { + cancelCleaner <- struct{}{} + return 4 + } + case <-workerDone: + return 0 + case <-cleanerDone: + return 3 + } + } +} + +// Start a worker goroutine and return immediately a `workerDone` channel. +// The goroutine will prepend its prints with the prefix `name`. +// The goroutine will simulate some work and will terminate when one of the following +// conditions happens: +// 1. When `howlong` is elapsed. This case will be signaled on the `workerDone` channel. +// 2. When something happens on channel `canceled`. Note that this simulates real-life, +// so cancellation is not instantaneous: if the caller wants a synchronous cancel, +// it should send a message; if instead it wants an asynchronous cancel, it should +// close the channel. +func worker( + canceled <-chan struct{}, + howlong time.Duration, + name string, +) <-chan struct{} { + workerDone := make(chan struct{}) + deadline := time.Now().Add(howlong) + go func() { + fmt.Printf("sleepit: %s started\n", name) + for { + select { + case <-canceled: + fmt.Printf("sleepit: %s canceled\n", name) + return + default: + if doSomeWork(deadline) { + fmt.Printf("sleepit: %s done\n", name) // <== NOTE THIS LINE + workerDone <- struct{}{} + return + } + } + } + }() + return workerDone +} + +// Do some work and then return, so that the caller can decide wether to continue or not. +// Return true when all work is done. +func doSomeWork(deadline time.Time) bool { + if time.Now().After(deadline) { + return true + } + timeout := 100 * time.Millisecond + time.Sleep(timeout) + return false +} diff --git a/testdata/ignore_signals/Taskfile.yml b/testdata/ignore_signals/Taskfile.yml new file mode 100644 index 00000000..f7763528 --- /dev/null +++ b/testdata/ignore_signals/Taskfile.yml @@ -0,0 +1,6 @@ +version: '3' + +tasks: + default: + cmds: + - '{{.CLI_ARGS}}' diff --git a/unix_test.go b/unix_test.go new file mode 100644 index 00000000..b89886a9 --- /dev/null +++ b/unix_test.go @@ -0,0 +1,226 @@ +// +build !windows + +// This file contains tests for signal handling on Unix. +// Based on code from https://github.com/marco-m/timeit +// Due to how signals work, for robustness we always spawn a separate process; +// we never send signals to the test process. + +package task_test + +import ( + "bytes" + "errors" + "os/exec" + "path/filepath" + "strings" + "syscall" + "testing" + "time" +) + +var ( + SLEEPIT, _ = filepath.Abs("./tmp/sleepit") +) + +func TestSignalSentToProcessGroup(t *testing.T) { + testCases := map[string]struct { + args []string + sendSigs int + want []string + notWant []string + }{ + // regression: + // - child is terminated, immediately, by "context canceled" (another bug???) + "child does not handle sigint: receives sigint and terminates immediately": { + args: []string{"task", "--", SLEEPIT, "default", "-sleep=10s"}, + sendSigs: 1, + want: []string{ + "sleepit: ready\n", + "sleepit: work started\n", + "task: signal received: interrupt\n", + // 130 = 128 + SIGINT + "task: Failed to run task \"default\": exit status 130\n", + }, + notWant: []string{ + "task: Failed to run task \"default\": context canceled\n", + }, + }, + // 2 regressions: + // - child receives 2 signals instead of 1 + // - child is terminated, immediately, by "context canceled" (another bug???) + // TODO we need -cleanup=2s only to show reliably the bug; once the fix is committed, + // we can use -cleanup=50ms to speed the test up + "child intercepts sigint: receives sigint and does cleanup": { + args: []string{"task", "--", SLEEPIT, "handle", "-sleep=10s", "-cleanup=2s"}, + sendSigs: 1, + want: []string{ + "sleepit: ready\n", + "sleepit: work started\n", + "task: signal received: interrupt\n", + "sleepit: got signal=interrupt count=1\n", + "sleepit: work canceled\n", + "sleepit: cleanup started\n", + "sleepit: cleanup done\n", + "task: Failed to run task \"default\": exit status 3\n", + }, + notWant: []string{ + "sleepit: got signal=interrupt count=2\n", + "task: Failed to run task \"default\": context canceled\n", + }, + }, + // regression: child receives 2 signal instead of 1 and thus terminates abruptly + "child simulates terraform: receives 1 sigint and does cleanup": { + args: []string{"task", "--", SLEEPIT, "handle", "-term-after=2", "-sleep=10s", "-cleanup=50ms"}, + sendSigs: 1, + want: []string{ + "sleepit: ready\n", + "sleepit: work started\n", + "task: signal received: interrupt\n", + "sleepit: got signal=interrupt count=1\n", + "sleepit: work canceled\n", + "sleepit: cleanup started\n", + "sleepit: cleanup done\n", + "task: Failed to run task \"default\": exit status 3\n", + }, + notWant: []string{ + "sleepit: got signal=interrupt count=2\n", + "sleepit: cleanup canceled\n", + "task: Failed to run task \"default\": exit status 4\n", + }, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + var out bytes.Buffer + sut := exec.Command(tc.args[0], tc.args[1:]...) + sut.Stdout = &out + sut.Stderr = &out + sut.Dir = "testdata/ignore_signals" + // Create a new process group by setting the process group ID of the child + // to the child PID. + // By default, the child would inherit the process group of the parent, but + // we want to avoid this, to protect the parent (the test process) from the + // signal that this test will send. More info in the comments below for + // syscall.Kill(). + sut.SysProcAttr = &syscall.SysProcAttr{Setpgid: true, Pgid: 0} + + if err := sut.Start(); err != nil { + t.Fatalf("starting the SUT process: %v", err) + } + + // After the child is started, we want to avoid a race condition where we send + // it a signal before it had time to setup its own signal handlers. Sleeping + // is way too flaky, instead we parse the child output until we get a line + // that we know is printed after the signal handlers are installed... + ready := false + timeout := time.Duration(time.Second) + start := time.Now() + for time.Since(start) < timeout { + if strings.Contains(out.String(), "sleepit: ready\n") { + ready = true + break + } + time.Sleep(10 * time.Millisecond) + } + if !ready { + t.Fatalf("sleepit not ready after %v\n"+ + "additional information:\n"+ + " output:\n%s", + timeout, out.String()) + } + + // When we have a running program in a shell and type CTRL-C, the tty driver + // will send a SIGINT signal to all the processes in the foreground process + // group (see https://en.wikipedia.org/wiki/Process_group). + // + // Here we want to emulate this behavior: send SIGINT to the process group of + // the test executable. Although Go for some reasons doesn't wrap the + // killpg(2) system call, what works is using syscall.Kill(-PID, SIGINT), + // where the negative PID means the corresponding process group. Note that + // this negative PID works only as long as the caller of the kill(2) system + // call has a different PID, which is the case for this test. + for i := 1; i <= tc.sendSigs; i++ { + if err := syscall.Kill(-sut.Process.Pid, syscall.SIGINT); err != nil { + t.Fatalf("sending INT signal to the process group: %v", err) + } + time.Sleep(1 * time.Millisecond) + } + + err := sut.Wait() + + // In case of a subprocess failing, Task always returns exit code 1, not the + // exit code returned by the subprocess. This is understandable, since Task + // supports parallel execution: if two parallel subprocess fail, each with a + // different exit code, which one should Task report? This would be a race. + var wantErr *exec.ExitError + const wantExitStatus = 1 // Task always returns exit code 1 in case of error + if errors.As(err, &wantErr) { + if wantErr.ExitCode() != wantExitStatus { + t.Errorf( + "waiting for child process: got exit status %v; want %d\n"+ + "additional information:\n"+ + " process state: %q", + wantErr.ExitCode(), wantExitStatus, wantErr.String()) + } + } else { + t.Errorf("waiting for child process: got unexpected error type %v (%T); want (%T)", + err, err, wantErr) + } + + gotLines := strings.SplitAfter(out.String(), "\n") + notFound := listDifference(tc.want, gotLines) + if len(notFound) > 0 { + t.Errorf("\nwanted but not found:\n%v", notFound) + } + + found := listIntersection(tc.notWant, gotLines) + if len(found) > 0 { + t.Errorf("\nunwanted but found:\n%v", found) + } + + if len(notFound) > 0 || len(found) > 0 { + t.Errorf("\noutput:\n%v", gotLines) + } + }) + } +} + +// Return the difference of the two lists: the elements that are present in the first +// list, but not in the second one. The notion of presence is not with `=` but with +// string.Contains(l2, l1). +// FIXME this does not enforce ordering. We might want to support both. +func listDifference(lines1, lines2 []string) []string { + difference := []string{} + for _, l1 := range lines1 { + found := false + for _, l2 := range lines2 { + if strings.Contains(l2, l1) { + found = true + break + } + } + if !found { + difference = append(difference, l1) + } + } + + return difference +} + +// Return the intersection of the two lists: the elements that are present in both lists. +// The notion of presence is not with '=' but with string.Contains(l2, l1) +// FIXME this does not enforce ordering. We might want to support both. +func listIntersection(lines1, lines2 []string) []string { + intersection := []string{} + for _, l1 := range lines1 { + for _, l2 := range lines2 { + if strings.Contains(l2, l1) { + intersection = append(intersection, l1) + break + } + } + } + + return intersection +}