[TestbedV2][master] Remove timeout in each step. (#12915)

Previously, we set timeout in each step such as Lock testbed, Prepare testbed, Run test and KVM dump. When some issue suck like retry happens in one step, it will cause timeout error, but actually, it only needs more time to success. In this pr, we remove the timeout limit in each step and control the timeout outside in each job. When the job runs more than four hours, it will be cancelled. Why I did it Previously, we set timeout in each step such as Lock testbed, Prepare testbed, Run test and KVM dump. When some issue suck like retry happens in one step, it will cause timeout error, but actually, it only needs more time to success. In this pr, we remove the timeout limit in each step and control the timeout outside in each job. When the job runs more than four hours, it will be cancelled. How I did it Remove the timeout parameter in each step, and control the timeout outside in each job. How to verify it Set the timeout of one job to 4 hours, and when timeout happens, azure pipeline will cancel this job.
2022-12-04 14:30:03 +08:00 · 2022-12-04 14:30:03 +08:00 · cb354a5af2
commit cb354a5af2
parent 8bf7a8b2ce
2 changed files with 10 additions and 14 deletions
--- a/.azure-pipelines/run-test-scheduler-template.yml
+++ b/.azure-pipelines/run-test-scheduler-template.yml
@ -81,11 +81,10 @@ steps:
      echo "TestbedV2 is just online and might not be stable enough, for any issue, please send email to sonictestbedtools@microsoft.com"
      echo "Runtime detailed progress at https://www.testbed-tools.org/scheduler/testplan/$TEST_PLAN_ID"
      # When "LOCK_TESTBED" finish, it changes into "PREPARE_TESTBED"
-      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --timeout 43200 --expected-states PREPARE_TESTBED EXECUTING KVMDUMP FINISHED CANCELLED FAILED
+      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-states PREPARE_TESTBED EXECUTING KVMDUMP FINISHED CANCELLED FAILED
    env:
      TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL)
    displayName: Lock testbed
    timeoutInMinutes: 240
  - script: |
      set -ex
@ -95,11 +94,10 @@ steps:
      echo "TestbedV2 is just online and might not be stable enough, for any issue, please send email to sonictestbedtools@microsoft.com"
      echo "Runtime detailed progress at https://www.testbed-tools.org/scheduler/testplan/$TEST_PLAN_ID"
      # When "PREPARE_TESTBED" finish, it changes into "EXECUTING"
-      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --timeout 2400 --expected-states EXECUTING KVMDUMP FINISHED CANCELLED FAILED
+      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-states EXECUTING KVMDUMP FINISHED CANCELLED FAILED
    env:
      TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL)
    displayName: Prepare testbed
    timeoutInMinutes: 40
  - script: |
      set -ex
@ -107,11 +105,10 @@ steps:
      echo "TestbedV2 is just online and might not be stable enough, for any issue, please send email to sonictestbedtools@microsoft.com"
      echo "Runtime detailed progress at https://www.testbed-tools.org/scheduler/testplan/$TEST_PLAN_ID"
      # When "EXECUTING" finish, it changes into "KVMDUMP", "FAILED", "CANCELLED" or "FINISHED"
-      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --timeout 18000 --expected-states KVMDUMP FINISHED CANCELLED FAILED
+      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-states KVMDUMP FINISHED CANCELLED FAILED
    env:
      TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL)
    displayName: Run test
    timeoutInMinutes: 300
  - script: |
      set -ex
@ -119,12 +116,11 @@ steps:
      echo "TestbedV2 is just online and might not be stable enough, for any issue, please send email to sonictestbedtools@microsoft.com"
      echo "Runtime detailed progress at https://www.testbed-tools.org/scheduler/testplan/$TEST_PLAN_ID"
      # When "KVMDUMP" finish, it changes into "FAILED", "CANCELLED" or "FINISHED"
-      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --timeout 43200 --expected-states FINISHED CANCELLED FAILED
+      python ./.azure-pipelines/test_plan.py poll -i "$(TEST_PLAN_ID)" --expected-states FINISHED CANCELLED FAILED
    condition: succeededOrFailed()
    env:
      TESTBED_TOOLS_URL: $(TESTBED_TOOLS_URL)
    displayName: KVM dump
    timeoutInMinutes: 20
  - script: |
      set -ex
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -177,7 +177,7 @@ stages:
    pool:
      vmImage: 'ubuntu-20.04'
    displayName: "kvmtest-t0 by TestbedV2"
-    timeoutInMinutes: 1080
+    timeoutInMinutes: 240
    condition: and(succeeded(), eq(variables.BUILD_IMG_RUN_TESTBEDV2_TEST, 'YES'))
    continueOnError: false
    steps:
@ -191,7 +191,7 @@ stages:
    pool:
      vmImage: 'ubuntu-20.04'
    displayName: "kvmtest-t0-2vlans by TestbedV2"
-    timeoutInMinutes: 1080
+    timeoutInMinutes: 240
    condition: and(succeeded(), eq(variables.BUILD_IMG_RUN_TESTBEDV2_TEST, 'YES'))
    continueOnError: false
    steps:
@ -255,7 +255,7 @@ stages:
    pool:
      vmImage: 'ubuntu-20.04'
    displayName: "kvmtest-t1-lag by TestbedV2"
-    timeoutInMinutes: 600
+    timeoutInMinutes: 240
    condition: and(succeeded(), eq(variables.BUILD_IMG_RUN_TESTBEDV2_TEST, 'YES'))
    continueOnError: false
    steps:
@ -321,7 +321,7 @@ stages:
    displayName: "kvmtest-multi-asic-t1-lag by TestbedV2"
    pool:
      vmImage: 'ubuntu-20.04'
-    timeoutInMinutes: 1080
+    timeoutInMinutes: 240
    condition: and(succeeded(), eq(variables.BUILD_IMG_RUN_TESTBEDV2_TEST, 'YES'))
    continueOnError: true
    steps:
@ -337,7 +337,7 @@ stages:
    pool:
      vmImage: 'ubuntu-20.04'
    displayName: "kvmtest-dualtor-t0 by TestbedV2"
-    timeoutInMinutes: 1080
+    timeoutInMinutes: 240
    condition: and(succeeded(), eq(variables.BUILD_IMG_RUN_TESTBEDV2_TEST, 'YES'))
    continueOnError: false
    steps:
@ -352,7 +352,7 @@ stages:
    displayName: "kvmtest-t0-sonic by TestbedV2"
    pool:
      vmImage: 'ubuntu-20.04'
-    timeoutInMinutes: 1080
+    timeoutInMinutes: 240
    condition: and(succeeded(), eq(variables.BUILD_IMG_RUN_TESTBEDV2_TEST, 'YES'))
    continueOnError: true
    steps: