Skip to content

Commit

Permalink
Add 'force-batch' deploy flag to allow force run of periodic job.
Browse files Browse the repository at this point in the history
A new 'force-batch' boolean flag has been added to the deploy
command which allows the immediate trigger of a periodic batch job
rather than waiting for the scheduled run. This is helpful when
deploying new code as operators can see the results quickly rather
than having to wait.

Closes hashicorp#107
  • Loading branch information
jrasell committed Feb 13, 2018
1 parent a49e3dd commit 9fd0e2e
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 9 deletions.
23 changes: 22 additions & 1 deletion command/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ General Options:
The time in seconds, after which Levant will auto-promote a canary job
if all canaries within the deployment are healthy.
-force-batch
Forces a new instance of the periodic job. A new instance will be created
even if it violates the job's prohibit_overlap settings.
-force-count
Use the taskgroup count from the Nomad jobfile instead of the count that
is currently set in a running job.
Expand Down Expand Up @@ -72,6 +76,7 @@ func (c *DeployCommand) Run(args []string) int {

flags.StringVar(&config.Addr, "address", "", "")
flags.IntVar(&config.Canary, "canary-auto-promote", 0, "")
flags.BoolVar(&config.ForceBatch, "force-batch", false, "")
flags.BoolVar(&config.ForceCount, "force-count", false, "")
flags.StringVar(&config.LogLevel, "log-level", "INFO", "")
flags.StringVar(&config.VaiableFile, "var-file", "", "")
Expand Down Expand Up @@ -108,8 +113,13 @@ func (c *DeployCommand) Run(args []string) int {
c.UI.Error(fmt.Sprintf("[ERROR] levant/command: %v", err))
return 1
}
}

c.UI.Info(fmt.Sprintf("[INFO] levant/command: running canary-auto-update of %vs", config.Canary))
if config.ForceBatch {
if err = c.checkForceBatch(config.Job, config.ForceBatch); err != nil {
c.UI.Error(fmt.Sprintf("[ERROR] levant/command: %v", err))
return 1
}
}

success := levant.TriggerDeployment(config)
Expand Down Expand Up @@ -137,3 +147,14 @@ func (c *DeployCommand) checkCanaryAutoPromote(job *nomad.Job, canaryAutoPromote

return fmt.Errorf("canary-auto-update of %v passed but job is not canary enabled", canaryAutoPromote)
}

// checkForceBatch ensures that if the force-batch flag is passed, the job is
// periodic.
func (c *DeployCommand) checkForceBatch(job *nomad.Job, forceBatch bool) error {

if forceBatch && job.IsPeriodic() {
return nil
}

return fmt.Errorf("force-batch passed but job is not periodic")
}
31 changes: 31 additions & 0 deletions command/deploy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,34 @@ func TestDeploy_checkCanaryAutoPromote(t *testing.T) {
}
}
}

func TestDeploy_checkForceBatch(t *testing.T) {

fVars := make(map[string]string)
depCommand := &DeployCommand{}
forceBatch := true

cases := []struct {
File string
ForceBatch bool
Output error
}{
{
File: "test-fixtures/periodic_batch.nomad",
ForceBatch: forceBatch,
Output: nil,
},
}

for i, c := range cases {
job, err := levant.RenderJob(c.File, "", &fVars)
if err != nil {
t.Fatalf("case %d failed: %v", i, err)
}

out := depCommand.checkForceBatch(job, c.ForceBatch)
if out != c.Output {
t.Fatalf("case %d: got \"%v\"; want %v", i, out, c.Output)
}
}
}
25 changes: 25 additions & 0 deletions command/test-fixtures/periodic_batch.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
job "periodic_batch_test" {
datacenters = ["dc1"]
region = "global"
type = "batch"
priority = 75
periodic {
cron = "* 1 * * * *"
prohibit_overlap = true
}
group "periodic_batch" {
task "periodic_batch" {
driver = "docker"
config {
image = "cogniteev/echo"
}
resources {
cpu = 100
memory = 128
network {
mbits = 1
}
}
}
}
}
39 changes: 31 additions & 8 deletions levant/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,19 @@ func (l *levantDeployment) deploy() (success bool) {
return
}

if l.config.ForceBatch {
if eval.EvalID, err = l.triggerPeriodic(l.config.Job.ID); err != nil {
logging.Error("levant/deploy: unable to trigger periodic instance of job %s: %v",
*l.config.Job.Name, err)
return
}
}

// Periodic and parameterized jobs do not return an evaluation and therefore
// can't perform the evaluationInspector.
if !l.config.Job.IsPeriodic() && !l.config.Job.IsParameterized() {
// can't perform the evaluationInspector unless we are forcing an instance of
// periodic which will yeild an EvalID.
if !l.config.Job.IsPeriodic() && !l.config.Job.IsParameterized() ||
l.config.Job.IsPeriodic() && l.config.ForceBatch {

// Trigger the evaluationInspector to identify any potential errors in the
// Nomad evaluation run. As far as I can tell from testing; a single alloc
Expand Down Expand Up @@ -378,6 +388,19 @@ func (l *levantDeployment) checkCanaryDeploymentHealth(depID string) (healthy bo
return
}

// triggerPeriodic is used to force an instance of a periodic job outside of the
// planned schedule. This results in an evalID being created that can then be
// checked in the same fashion as other jobs.
func (l *levantDeployment) triggerPeriodic(jobID *string) (evalID string, err error) {

logging.Info("levant/deploy: triggering a run of periodic job %s", *jobID)

// Trigger the run if possible and just returning both the evalID and the err.
// There is no need to check this here as the caller does this.
evalID, _, err = l.nomad.Jobs().PeriodicForce(*jobID, nil)
return
}

// getDeploymentID finds the Nomad deploymentID associated to a Nomad
// evaluationID. This is only needed as sometimes Nomad initially returns eval
// info with an empty deploymentID; and a retry is required in order to get the
Expand Down Expand Up @@ -411,12 +434,6 @@ func (l *levantDeployment) dynamicGroupCountUpdater() error {
// Nomad cluster.
rJob, _, err := l.nomad.Jobs().Info(*l.config.Job.Name, &nomad.QueryOptions{})

// Check that the job is actually running and not in a potentially stopped
// state.
if *rJob.Status != nomadStructs.JobStatusRunning {
return nil
}

// This is a hack due to GH-1849; we check the error string for 404 which
// indicates the job is not running, not that there was an error in the API
// call.
Expand All @@ -428,6 +445,12 @@ func (l *levantDeployment) dynamicGroupCountUpdater() error {
return err
}

// Check that the job is actually running and not in a potentially stopped
// state.
if *rJob.Status != nomadStructs.JobStatusRunning {
return nil
}

logging.Debug("levant/deploy: running dynamic job count updater for job %s", *l.config.Job.Name)

// Iterate the templated job and the Nomad returned job and update group count
Expand Down
4 changes: 4 additions & 0 deletions levant/structs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ type Config struct {
// until attempting to perfrom autopromote.
Canary int

// ForceBatch is a boolean flag that can be used to force a run of a periodic
// job upon registration.
ForceBatch bool

// ForceCount is a boolean flag that can be used to ignore running job counts
// and force the count based on the rendered job file.
ForceCount bool
Expand Down

0 comments on commit 9fd0e2e

Please sign in to comment.