|
| 1 | +# Rate limit exceeded [#891](https://github.com/Flank/flank/issues/891) |
| 2 | +Reported description: |
| 3 | +> After bump from 20.05.2 to 20.06.2 I started to see some issues related to rate limit. Also, there are no significant changes on the shard size or test amount. |
| 4 | +
|
| 5 | +> We are running another 7 flank execution in parallel. Total ~6k tests. |
| 6 | +
|
| 7 | +## Stack trace |
| 8 | +``` |
| 9 | +java.io.IOException: Request failed |
| 10 | + at ftl.http.ExecuteWithRetryKt$executeWithRetry$$inlined$withRetry$1.invokeSuspend(ExecuteWithRetry.kt:36) |
| 11 | + at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:33) |
| 12 | + at kotlinx.coroutines.DispatchedTask.run(Dispatched.kt:241) |
| 13 | + at kotlinx.coroutines.EventLoopImplBase.processNextEvent(EventLoop.common.kt:270) |
| 14 | + at kotlinx.coroutines.BlockingCoroutine.joinBlocking(Builders.kt:79) |
| 15 | + at kotlinx.coroutines.BuildersKt__BuildersKt.runBlocking(Builders.kt:54) |
| 16 | + at kotlinx.coroutines.BuildersKt.runBlocking(Unknown Source) |
| 17 | + at kotlinx.coroutines.BuildersKt__BuildersKt.runBlocking$default(Builders.kt:36) |
| 18 | + at kotlinx.coroutines.BuildersKt.runBlocking$default(Unknown Source) |
| 19 | + at ftl.http.ExecuteWithRetryKt.executeWithRetry(ExecuteWithRetry.kt:41) |
| 20 | + at ftl.gc.GcToolResults.getStepResult(GcToolResults.kt:98) |
| 21 | + at ftl.json.SavedMatrix.finished(SavedMatrix.kt:90) |
| 22 | + at ftl.json.SavedMatrix.update(SavedMatrix.kt:65) |
| 23 | + at ftl.json.MatrixMapKt.update(MatrixMap.kt:47) |
| 24 | + at ftl.run.NewTestRunKt$newTestRun$2$2.invokeSuspend(NewTestRun.kt:24) |
| 25 | + at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:33) |
| 26 | + at kotlinx.coroutines.ResumeModeKt.resumeUninterceptedMode(ResumeMode.kt:45) |
| 27 | + at kotlinx.coroutines.internal.ScopeCoroutine.afterCompletionInternal(Scopes.kt:32) |
| 28 | + at kotlinx.coroutines.JobSupport.completeStateFinalization(JobSupport.kt:310) |
| 29 | + at kotlinx.coroutines.JobSupport.tryFinalizeSimpleState(JobSupport.kt:276) |
| 30 | + at kotlinx.coroutines.JobSupport.tryMakeCompleting(JobSupport.kt:807) |
| 31 | + at kotlinx.coroutines.JobSupport.makeCompletingOnce$kotlinx_coroutines_core(JobSupport.kt:787) |
| 32 | + at kotlinx.coroutines.AbstractCoroutine.resumeWith(AbstractCoroutine.kt:111) |
| 33 | + at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:46) |
| 34 | + at kotlinx.coroutines.ResumeModeKt.resumeUninterceptedMode(ResumeMode.kt:45) |
| 35 | + at kotlinx.coroutines.internal.ScopeCoroutine.afterCompletionInternal(Scopes.kt:32) |
| 36 | + at kotlinx.coroutines.JobSupport.completeStateFinalization(JobSupport.kt:310) |
| 37 | + at kotlinx.coroutines.JobSupport.tryFinalizeFinishingState(JobSupport.kt:236) |
| 38 | + at kotlinx.coroutines.JobSupport.tryMakeCompletingSlowPath(JobSupport.kt:849) |
| 39 | + at kotlinx.coroutines.JobSupport.tryMakeCompleting(JobSupport.kt:811) |
| 40 | + at kotlinx.coroutines.JobSupport.makeCompletingOnce$kotlinx_coroutines_core(JobSupport.kt:787) |
| 41 | + at kotlinx.coroutines.AbstractCoroutine.resumeWith(AbstractCoroutine.kt:111) |
| 42 | + at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:46) |
| 43 | + at kotlinx.coroutines.DispatchedTask.run(Dispatched.kt:241) |
| 44 | + at kotlinx.coroutines.EventLoopImplBase.processNextEvent(EventLoop.common.kt:270) |
| 45 | + at kotlinx.coroutines.BlockingCoroutine.joinBlocking(Builders.kt:79) |
| 46 | + at kotlinx.coroutines.BuildersKt__BuildersKt.runBlocking(Builders.kt:54) |
| 47 | + at kotlinx.coroutines.BuildersKt.runBlocking(Unknown Source) |
| 48 | + at kotlinx.coroutines.BuildersKt__BuildersKt.runBlocking$default(Builders.kt:36) |
| 49 | + at kotlinx.coroutines.BuildersKt.runBlocking$default(Unknown Source) |
| 50 | + at ftl.cli.firebase.test.android.AndroidRunCommand.run(AndroidRunCommand.kt:48) |
| 51 | + at picocli.CommandLine.executeUserObject(CommandLine.java:1769) |
| 52 | + at picocli.CommandLine.access$900(CommandLine.java:145) |
| 53 | + at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2150) |
| 54 | + at picocli.CommandLine$RunLast.handle(CommandLine.java:2144) |
| 55 | + at picocli.CommandLine$RunLast.handle(CommandLine.java:2108) |
| 56 | + at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:1975) |
| 57 | + at picocli.CommandLine.execute(CommandLine.java:1904) |
| 58 | + at ftl.Main$Companion$main$1.invoke(Main.kt:53) |
| 59 | + at ftl.Main$Companion$main$1.invoke(Main.kt:43) |
| 60 | + at ftl.util.Utils.withGlobalExceptionHandling(Utils.kt:130) |
| 61 | + at ftl.Main$Companion.main(Main.kt:49) |
| 62 | + at ftl.Main.main(Main.kt) |
| 63 | +C used by: com.google.api.client.googleapis.json.GoogleJsonResponseException: 429 Too Many Requests |
| 64 | +{ |
| 65 | + "code" : 429, |
| 66 | + "errors" : [ { |
| 67 | + "domain" : "global", |
| 68 | + "message" : "Quota exceeded for quota group 'default' and limit 'Queries per user per 100 seconds' of service 'toolresults.googleapis.com' for consumer 'project_number:x'.", |
| 69 | + "reason" : "rateLimitExceeded" |
| 70 | + } ], |
| 71 | + "message" : "Quota exceeded for quota group 'default' and limit 'Queries per user per 100 seconds' of service 'toolresults.googleapis.com' for consumer 'project_number:x'.", |
| 72 | + "status" : "RESOURCE_EXHAUSTED" |
| 73 | +} |
| 74 | + at com.google.api.client.googleapis.json.GoogleJsonResponseException.from(GoogleJsonResponseException.java:150) |
| 75 | + at com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:113) |
| 76 | + at com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:40) |
| 77 | + at com.google.api.client.googleapis.services.AbstractGoogleClientRequest$1.interceptResponse(AbstractGoogleClientRequest.java:443) |
| 78 | + at com.google.api.client.http.HttpRequest.execute(HttpRequest.java:1092) |
| 79 | + at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:541) |
| 80 | + at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:474) |
| 81 | + at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.execute(AbstractGoogleClientRequest.java:591) |
| 82 | + at ftl.http.ExecuteWithRetryKt$executeWithRetry$$inlined$withRetry$1.invokeSuspend(ExecuteWithRetry.kt:41) |
| 83 | + ... 52 more |
| 84 | +``` |
| 85 | + |
| 86 | +## Reported flank config |
| 87 | +```yaml |
| 88 | + flank: |
| 89 | + max-test-shards: 15 |
| 90 | + shard-time: 160 |
| 91 | + num-test-runs: 1 |
| 92 | + smart-flank-gcs-path: gs://x/unit-tests.xml |
| 93 | + smart-flank-disable-upload: false |
| 94 | + files-to-download: |
| 95 | + test-targets-always-run: |
| 96 | + disable-sharding: false |
| 97 | + project: x |
| 98 | + local-result-dir: results |
| 99 | + full-junit-result: true |
| 100 | + # Android Flank Yml |
| 101 | + keep-file-path: false |
| 102 | + additional-app-test-apks: |
| 103 | + run-timeout: -1 |
| 104 | + legacy-junit-result: false |
| 105 | + ignore-failed-tests: false |
| 106 | + output-style: multi |
| 107 | + |
| 108 | +RunTests |
| 109 | + |
| 110 | + Smart Flank cache hit: 100% (88 / 88) |
| 111 | + Shard times: 93s, 93s |
| 112 | + |
| 113 | + Uploadingx-app-debug.apk . |
| 114 | + Uploading x-androidTest.apk . |
| 115 | + 88 tests / 2 shards |
| 116 | +``` |
| 117 | + |
| 118 | +## To Reproduce |
| 119 | +```shell script |
| 120 | +Flank.jar firebase test android run --num-flaky-test-attempts=0 --full-junit-result=true |
| 121 | +``` |
| 122 | + |
| 123 | +## API usage outline |
| 124 | +Real examples simplified to pseudo code that outlines only API call usage |
| 125 | +#### Gcloud |
| 126 | +Case for disabled sharding |
| 127 | +* [MonitorTestExecutionProgress](https://github.com/Flank/gcloud_cli/blob/137d864acd5928baf25434cf59b0225c4d1f9319/google-cloud-sdk/lib/googlecloudsdk/api_lib/firebase/test/matrix_ops.py#L164) |
| 128 | +``` |
| 129 | +while (matrix status is not completed) { |
| 130 | + _client.projects_testMatrices.Get( |
| 131 | + TestingProjectsTestMatricesGetRequest(projectId, testMatrixId) |
| 132 | + ) |
| 133 | + wait(6s) |
| 134 | +} |
| 135 | +``` |
| 136 | +* [MonitorTestMatrixProgress](https://github.com/Flank/gcloud_cli/blob/137d864acd5928baf25434cf59b0225c4d1f9319/google-cloud-sdk/lib/googlecloudsdk/api_lib/firebase/test/matrix_ops.py#L248) |
| 137 | +Case for enabled sharding |
| 138 | +``` |
| 139 | +while (matrix status is not completed) { |
| 140 | + _client.projects_testMatrices.Get( |
| 141 | + TestingProjectsTestMatricesGetRequest(projectId, testMatrixId) |
| 142 | + ) |
| 143 | + wait(6s) |
| 144 | +} |
| 145 | +``` |
| 146 | + |
| 147 | +#### Flank v20.06.2 |
| 148 | +* [pollMatrices](https://github.com/Flank/flank/blob/6ee6939263923953edf67afa7218cf2c496c2ef2/test_runner/src/main/kotlin/ftl/run/common/PollMatrices.kt#L19) |
| 149 | +``` |
| 150 | +forEach(test matrix) { |
| 151 | + async while(matrix status is not completed) { |
| 152 | + GcTestMatrix.refresh(testMatrixId, projectId) |
| 153 | + wait(5s) |
| 154 | + } |
| 155 | +} |
| 156 | +``` |
| 157 | +* [SavedMatrix.finished](https://github.com/Flank/flank/blob/c88cb2786de67c0a114fc31a7b25917a035e145b/test_runner/src/main/kotlin/ftl/json/SavedMatrix.kt#L75) |
| 158 | +``` |
| 159 | +forEach(test matrix) { |
| 160 | + sync forEach(matrix test execution) { |
| 161 | + GcToolResults.listTestCases(toolResultsStep) |
| 162 | + GcToolResults.getStepResult(toolResultsStep) |
| 163 | + } |
| 164 | + sync forEach(matrix test execution) { |
| 165 | + GcToolResults.getExecutionResult(testExecution) |
| 166 | + GcToolResults.getStepResult(toolResultsStep) |
| 167 | + } |
| 168 | +} |
| 169 | +``` |
| 170 | + |
| 171 | +#### Flank v20.05.2 |
| 172 | +* [pollMatrices](https://github.com/Flank/flank/blob/accca3b941874e9556eea6616b34a9f4319c8746/test_runner/src/main/kotlin/ftl/run/common/PollMatrices.kt#L15) |
| 173 | +``` |
| 174 | +// Only the first matrix is getting status updates |
| 175 | +// the others are blocked until the first is getting completed. |
| 176 | +// As a result, it reduces the amount of requests to 1 per 5 secs. |
| 177 | +// That is why this version of the flank is not getting limit exceeded. |
| 178 | +forEach(test matrix) { |
| 179 | + sync while(matrix status is not completed) { |
| 180 | + GcTestMatrix.refresh(testMatrixId, projectId) |
| 181 | + wait(5s) |
| 182 | + } |
| 183 | +} |
| 184 | +``` |
| 185 | + |
| 186 | +## API calls usage comparision table |
| 187 | +Following table should compare API calls complexity. |
| 188 | + |
| 189 | +| | execution status updates | |
| 190 | +|:-------:|:------------------------:| |
| 191 | +| Gcloud | 1 * r / 6s + 1 | |
| 192 | +| 20.05.2 | 1 * r / 5s + (E * r) | |
| 193 | +| 20.06.2 | M * r / 5s + (M * E * 4 * r) | |
| 194 | + |
| 195 | +``` |
| 196 | +r - request |
| 197 | +s - second |
| 198 | +M - count of matrixes |
| 199 | +E - count of test executions in matrix scope |
| 200 | +``` |
| 201 | + |
| 202 | +## Conclusions |
| 203 | +#### Gclud |
| 204 | +Because of single matrix run gives only 1 request per 6 seconds |
| 205 | + |
| 206 | +#### Flank v20.05.2 |
| 207 | +Gets matrix status updates in synchronize way so only the first matrix is getting status updates, |
| 208 | +the others are blocked until the first is getting completed, so the amount of requests is reduced to 1 per 5 secs. |
| 209 | +Plus additional number of requests for each execution in one matrix scope. |
| 210 | +That is why this version of the flank is not getting a limit exceeded. |
| 211 | + |
| 212 | +#### Flank v20.06.2 |
| 213 | +Is polling results asynchronously for each matrix. |
| 214 | +At last, it is doing a request for each test execution for each matrix, |
| 215 | +and this is the place where flank is getting a rate limit exceeded. |
| 216 | +It is visible on stack trace. |
| 217 | +``` |
| 218 | + at ftl.gc.GcToolResults.getStepResult(GcToolResults.kt:98) |
| 219 | + at ftl.json.SavedMatrix.finished(SavedMatrix.kt:90) |
| 220 | +``` |
0 commit comments