Module arvados.retry

Utilities to retry operations.

The core of this module is RetryLoop, a utility class to retry operations that might fail. It can distinguish between temporary and permanent failures; provide exponential backoff; and save a series of results.

It also provides utility functions for common operations with RetryLoop:

Expand source code
"""Utilities to retry operations.

The core of this module is `RetryLoop`, a utility class to retry operations
that might fail. It can distinguish between temporary and permanent failures;
provide exponential backoff; and save a series of results.

It also provides utility functions for common operations with `RetryLoop`:

* `check_http_response_success` can be used as a `RetryLoop` `success_check`
  for HTTP response codes from the Arvados API server.
* `retry_method` can decorate methods to provide a default `num_retries`
  keyword argument.
"""
# Copyright (C) The Arvados Authors. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

from builtins import range
from builtins import object
import functools
import inspect
import pycurl
import time

from collections import deque

import arvados.errors

_HTTP_SUCCESSES = set(range(200, 300))
_HTTP_CAN_RETRY = set([408, 409, 422, 423, 500, 502, 503, 504])

class RetryLoop(object):
    """Coordinate limited retries of code.

    `RetryLoop` coordinates a loop that runs until it records a
    successful result or tries too many times, whichever comes first.
    Typical use looks like:

        loop = RetryLoop(num_retries=2)
        for tries_left in loop:
            try:
                result = do_something()
            except TemporaryError as error:
                log("error: {} ({} tries left)".format(error, tries_left))
            else:
                loop.save_result(result)
        if loop.success():
            return loop.last_result()

    Arguments:

    num_retries: int
    : The maximum number of times to retry the loop if it
      doesn't succeed.  This means the loop body could run at most
      `num_retries + 1` times.

    success_check: Callable
    : This is a function that will be called each
      time the loop saves a result.  The function should return
      `True` if the result indicates the code succeeded, `False` if it
      represents a permanent failure, and `None` if it represents a
      temporary failure.  If no function is provided, the loop will
      end after any result is saved.

    backoff_start: float
    : The number of seconds that must pass before the loop's second
      iteration.  Default 0, which disables all waiting.

    backoff_growth: float
    : The wait time multiplier after each iteration.
      Default 2 (i.e., double the wait time each time).

    save_results: int
    : Specify a number to store that many saved results from the loop.
      These are available through the `results` attribute, oldest first.
      Default 1.

    max_wait: float
    : Maximum number of seconds to wait between retries. Default 60.
    """
    def __init__(self, num_retries, success_check=lambda r: True,
                 backoff_start=0, backoff_growth=2, save_results=1,
                 max_wait=60):
        self.tries_left = num_retries + 1
        self.check_result = success_check
        self.backoff_wait = backoff_start
        self.backoff_growth = backoff_growth
        self.max_wait = max_wait
        self.next_start_time = 0
        self.results = deque(maxlen=save_results)
        self._attempts = 0
        self._running = None
        self._success = None

    def __iter__(self):
        """Return an iterator of retries."""
        return self

    def running(self):
        """Return whether this loop is running.

        Returns `None` if the loop has never run, `True` if it is still running,
        or `False` if it has stopped—whether that's because it has saved a
        successful result, a permanent failure, or has run out of retries.
        """
        return self._running and (self._success is None)

    def __next__(self):
        """Record a loop attempt.

        If the loop is still running, decrements the number of tries left and
        returns it. Otherwise, raises `StopIteration`.
        """
        if self._running is None:
            self._running = True
        if (self.tries_left < 1) or not self.running():
            self._running = False
            raise StopIteration
        else:
            wait_time = max(0, self.next_start_time - time.time())
            time.sleep(wait_time)
            self.backoff_wait *= self.backoff_growth
            if self.backoff_wait > self.max_wait:
                self.backoff_wait = self.max_wait
        self.next_start_time = time.time() + self.backoff_wait
        self.tries_left -= 1
        return self.tries_left

    def save_result(self, result):
        """Record a loop result.

        Save the given result, and end the loop if it indicates
        success or permanent failure. See documentation for the `__init__`
        `success_check` argument to learn how that's indicated.

        Raises `arvados.errors.AssertionError` if called after the loop has
        already ended.

        Arguments:

        result: Any
        : The result from this loop attempt to check and save.
        """
        if not self.running():
            raise arvados.errors.AssertionError(
                "recorded a loop result after the loop finished")
        self.results.append(result)
        self._success = self.check_result(result)
        self._attempts += 1

    def success(self):
        """Return the loop's end state.

        Returns `True` if the loop recorded a successful result, `False` if it
        recorded permanent failure, or else `None`.
        """
        return self._success

    def last_result(self):
        """Return the most recent result the loop saved.

        Raises `arvados.errors.AssertionError` if called before any result has
        been saved.
        """
        try:
            return self.results[-1]
        except IndexError:
            raise arvados.errors.AssertionError(
                "queried loop results before any were recorded")

    def attempts(self):
        """Return the number of results that have been saved.

        This count includes all kinds of results: success, permanent failure,
        and temporary failure.
        """
        return self._attempts

    def attempts_str(self):
        """Return a human-friendly string counting saved results.

        This method returns '1 attempt' or 'N attempts', where the number
        in the string is the number of saved results.
        """
        if self._attempts == 1:
            return '1 attempt'
        else:
            return '{} attempts'.format(self._attempts)


def check_http_response_success(status_code):
    """Convert a numeric HTTP status code to a loop control flag.

    This method takes a numeric HTTP status code and returns `True` if
    the code indicates success, `None` if it indicates temporary
    failure, and `False` otherwise.  You can use this as the
    `success_check` for a `RetryLoop` that queries the Arvados API server.
    Specifically:

    * Any 2xx result returns `True`.

    * A select few status codes, or any malformed responses, return `None`.
      422 Unprocessable Entity is in this category.  This may not meet the
      letter of the HTTP specification, but the Arvados API server will
      use it for various server-side problems like database connection
      errors.

    * Everything else returns `False`.  Note that this includes 1xx and
      3xx status codes.  They don't indicate success, and you can't
      retry those requests verbatim.

    Arguments:

    status_code: int
    : A numeric HTTP response code
    """
    if status_code in _HTTP_SUCCESSES:
        return True
    elif status_code in _HTTP_CAN_RETRY:
        return None
    elif 100 <= status_code < 600:
        return False
    else:
        return None  # Get well soon, server.

def retry_method(orig_func):
    """Provide a default value for a method's num_retries argument.

    This is a decorator for instance and class methods that accept a
    `num_retries` keyword argument, with a `None` default.  When the method
    is called without a value for `num_retries`, this decorator will set it
    from the `num_retries` attribute of the underlying instance or class.

    Arguments:

    orig_func: Callable
    : A class or instance method that accepts a `num_retries` keyword argument
    """
    @functools.wraps(orig_func)
    def num_retries_setter(self, *args, **kwargs):
        if kwargs.get('num_retries') is None:
            kwargs['num_retries'] = self.num_retries
        return orig_func(self, *args, **kwargs)
    return num_retries_setter

Functions

def check_http_response_success(status_code)

Convert a numeric HTTP status code to a loop control flag.

This method takes a numeric HTTP status code and returns True if the code indicates success, None if it indicates temporary failure, and False otherwise. You can use this as the success_check for a RetryLoop that queries the Arvados API server. Specifically:

  • Any 2xx result returns True.

  • A select few status codes, or any malformed responses, return None. 422 Unprocessable Entity is in this category. This may not meet the letter of the HTTP specification, but the Arvados API server will use it for various server-side problems like database connection errors.

  • Everything else returns False. Note that this includes 1xx and 3xx status codes. They don't indicate success, and you can't retry those requests verbatim.

Arguments:

status_code: int
A numeric HTTP response code
Expand source code
def check_http_response_success(status_code):
    """Convert a numeric HTTP status code to a loop control flag.

    This method takes a numeric HTTP status code and returns `True` if
    the code indicates success, `None` if it indicates temporary
    failure, and `False` otherwise.  You can use this as the
    `success_check` for a `RetryLoop` that queries the Arvados API server.
    Specifically:

    * Any 2xx result returns `True`.

    * A select few status codes, or any malformed responses, return `None`.
      422 Unprocessable Entity is in this category.  This may not meet the
      letter of the HTTP specification, but the Arvados API server will
      use it for various server-side problems like database connection
      errors.

    * Everything else returns `False`.  Note that this includes 1xx and
      3xx status codes.  They don't indicate success, and you can't
      retry those requests verbatim.

    Arguments:

    status_code: int
    : A numeric HTTP response code
    """
    if status_code in _HTTP_SUCCESSES:
        return True
    elif status_code in _HTTP_CAN_RETRY:
        return None
    elif 100 <= status_code < 600:
        return False
    else:
        return None  # Get well soon, server.
def retry_method(orig_func)

Provide a default value for a method's num_retries argument.

This is a decorator for instance and class methods that accept a num_retries keyword argument, with a None default. When the method is called without a value for num_retries, this decorator will set it from the num_retries attribute of the underlying instance or class.

Arguments:

orig_func: Callable
A class or instance method that accepts a num_retries keyword argument
Expand source code
def retry_method(orig_func):
    """Provide a default value for a method's num_retries argument.

    This is a decorator for instance and class methods that accept a
    `num_retries` keyword argument, with a `None` default.  When the method
    is called without a value for `num_retries`, this decorator will set it
    from the `num_retries` attribute of the underlying instance or class.

    Arguments:

    orig_func: Callable
    : A class or instance method that accepts a `num_retries` keyword argument
    """
    @functools.wraps(orig_func)
    def num_retries_setter(self, *args, **kwargs):
        if kwargs.get('num_retries') is None:
            kwargs['num_retries'] = self.num_retries
        return orig_func(self, *args, **kwargs)
    return num_retries_setter

Classes

class RetryLoop (num_retries, success_check=<function RetryLoop.<lambda>>, backoff_start=0, backoff_growth=2, save_results=1, max_wait=60)

Coordinate limited retries of code.

RetryLoop coordinates a loop that runs until it records a successful result or tries too many times, whichever comes first. Typical use looks like:

loop = RetryLoop(num_retries=2)
for tries_left in loop:
    try:
        result = do_something()
    except TemporaryError as error:
        log("error: {} ({} tries left)".format(error, tries_left))
    else:
        loop.save_result(result)
if loop.success():
    return loop.last_result()

Arguments:

num_retries: int
The maximum number of times to retry the loop if it doesn't succeed. This means the loop body could run at most num_retries + 1 times.
success_check: Callable
This is a function that will be called each time the loop saves a result. The function should return True if the result indicates the code succeeded, False if it represents a permanent failure, and None if it represents a temporary failure. If no function is provided, the loop will end after any result is saved.
backoff_start: float
The number of seconds that must pass before the loop's second iteration. Default 0, which disables all waiting.
backoff_growth: float
The wait time multiplier after each iteration. Default 2 (i.e., double the wait time each time).
save_results: int
Specify a number to store that many saved results from the loop. These are available through the results attribute, oldest first. Default 1.
max_wait: float
Maximum number of seconds to wait between retries. Default 60.
Expand source code
class RetryLoop(object):
    """Coordinate limited retries of code.

    `RetryLoop` coordinates a loop that runs until it records a
    successful result or tries too many times, whichever comes first.
    Typical use looks like:

        loop = RetryLoop(num_retries=2)
        for tries_left in loop:
            try:
                result = do_something()
            except TemporaryError as error:
                log("error: {} ({} tries left)".format(error, tries_left))
            else:
                loop.save_result(result)
        if loop.success():
            return loop.last_result()

    Arguments:

    num_retries: int
    : The maximum number of times to retry the loop if it
      doesn't succeed.  This means the loop body could run at most
      `num_retries + 1` times.

    success_check: Callable
    : This is a function that will be called each
      time the loop saves a result.  The function should return
      `True` if the result indicates the code succeeded, `False` if it
      represents a permanent failure, and `None` if it represents a
      temporary failure.  If no function is provided, the loop will
      end after any result is saved.

    backoff_start: float
    : The number of seconds that must pass before the loop's second
      iteration.  Default 0, which disables all waiting.

    backoff_growth: float
    : The wait time multiplier after each iteration.
      Default 2 (i.e., double the wait time each time).

    save_results: int
    : Specify a number to store that many saved results from the loop.
      These are available through the `results` attribute, oldest first.
      Default 1.

    max_wait: float
    : Maximum number of seconds to wait between retries. Default 60.
    """
    def __init__(self, num_retries, success_check=lambda r: True,
                 backoff_start=0, backoff_growth=2, save_results=1,
                 max_wait=60):
        self.tries_left = num_retries + 1
        self.check_result = success_check
        self.backoff_wait = backoff_start
        self.backoff_growth = backoff_growth
        self.max_wait = max_wait
        self.next_start_time = 0
        self.results = deque(maxlen=save_results)
        self._attempts = 0
        self._running = None
        self._success = None

    def __iter__(self):
        """Return an iterator of retries."""
        return self

    def running(self):
        """Return whether this loop is running.

        Returns `None` if the loop has never run, `True` if it is still running,
        or `False` if it has stopped—whether that's because it has saved a
        successful result, a permanent failure, or has run out of retries.
        """
        return self._running and (self._success is None)

    def __next__(self):
        """Record a loop attempt.

        If the loop is still running, decrements the number of tries left and
        returns it. Otherwise, raises `StopIteration`.
        """
        if self._running is None:
            self._running = True
        if (self.tries_left < 1) or not self.running():
            self._running = False
            raise StopIteration
        else:
            wait_time = max(0, self.next_start_time - time.time())
            time.sleep(wait_time)
            self.backoff_wait *= self.backoff_growth
            if self.backoff_wait > self.max_wait:
                self.backoff_wait = self.max_wait
        self.next_start_time = time.time() + self.backoff_wait
        self.tries_left -= 1
        return self.tries_left

    def save_result(self, result):
        """Record a loop result.

        Save the given result, and end the loop if it indicates
        success or permanent failure. See documentation for the `__init__`
        `success_check` argument to learn how that's indicated.

        Raises `arvados.errors.AssertionError` if called after the loop has
        already ended.

        Arguments:

        result: Any
        : The result from this loop attempt to check and save.
        """
        if not self.running():
            raise arvados.errors.AssertionError(
                "recorded a loop result after the loop finished")
        self.results.append(result)
        self._success = self.check_result(result)
        self._attempts += 1

    def success(self):
        """Return the loop's end state.

        Returns `True` if the loop recorded a successful result, `False` if it
        recorded permanent failure, or else `None`.
        """
        return self._success

    def last_result(self):
        """Return the most recent result the loop saved.

        Raises `arvados.errors.AssertionError` if called before any result has
        been saved.
        """
        try:
            return self.results[-1]
        except IndexError:
            raise arvados.errors.AssertionError(
                "queried loop results before any were recorded")

    def attempts(self):
        """Return the number of results that have been saved.

        This count includes all kinds of results: success, permanent failure,
        and temporary failure.
        """
        return self._attempts

    def attempts_str(self):
        """Return a human-friendly string counting saved results.

        This method returns '1 attempt' or 'N attempts', where the number
        in the string is the number of saved results.
        """
        if self._attempts == 1:
            return '1 attempt'
        else:
            return '{} attempts'.format(self._attempts)

Methods

def attempts(self)

Return the number of results that have been saved.

This count includes all kinds of results: success, permanent failure, and temporary failure.

Expand source code
def attempts(self):
    """Return the number of results that have been saved.

    This count includes all kinds of results: success, permanent failure,
    and temporary failure.
    """
    return self._attempts
def attempts_str(self)

Return a human-friendly string counting saved results.

This method returns '1 attempt' or 'N attempts', where the number in the string is the number of saved results.

Expand source code
def attempts_str(self):
    """Return a human-friendly string counting saved results.

    This method returns '1 attempt' or 'N attempts', where the number
    in the string is the number of saved results.
    """
    if self._attempts == 1:
        return '1 attempt'
    else:
        return '{} attempts'.format(self._attempts)
def last_result(self)

Return the most recent result the loop saved.

Raises AssertionError if called before any result has been saved.

Expand source code
def last_result(self):
    """Return the most recent result the loop saved.

    Raises `arvados.errors.AssertionError` if called before any result has
    been saved.
    """
    try:
        return self.results[-1]
    except IndexError:
        raise arvados.errors.AssertionError(
            "queried loop results before any were recorded")
def running(self)

Return whether this loop is running.

Returns None if the loop has never run, True if it is still running, or False if it has stopped—whether that's because it has saved a successful result, a permanent failure, or has run out of retries.

Expand source code
def running(self):
    """Return whether this loop is running.

    Returns `None` if the loop has never run, `True` if it is still running,
    or `False` if it has stopped—whether that's because it has saved a
    successful result, a permanent failure, or has run out of retries.
    """
    return self._running and (self._success is None)
def save_result(self, result)

Record a loop result.

Save the given result, and end the loop if it indicates success or permanent failure. See documentation for the __init__ success_check argument to learn how that's indicated.

Raises AssertionError if called after the loop has already ended.

Arguments:

result: Any
The result from this loop attempt to check and save.
Expand source code
def save_result(self, result):
    """Record a loop result.

    Save the given result, and end the loop if it indicates
    success or permanent failure. See documentation for the `__init__`
    `success_check` argument to learn how that's indicated.

    Raises `arvados.errors.AssertionError` if called after the loop has
    already ended.

    Arguments:

    result: Any
    : The result from this loop attempt to check and save.
    """
    if not self.running():
        raise arvados.errors.AssertionError(
            "recorded a loop result after the loop finished")
    self.results.append(result)
    self._success = self.check_result(result)
    self._attempts += 1
def success(self)

Return the loop's end state.

Returns True if the loop recorded a successful result, False if it recorded permanent failure, or else None.

Expand source code
def success(self):
    """Return the loop's end state.

    Returns `True` if the loop recorded a successful result, `False` if it
    recorded permanent failure, or else `None`.
    """
    return self._success