arvados.util

Arvados utilities

This module provides functions and constants that are useful across a variety of Arvados resource types, or extend the Arvados API client (see arvados.api).

  1# Copyright (C) The Arvados Authors. All rights reserved.
  2#
  3# SPDX-License-Identifier: Apache-2.0
  4"""Arvados utilities
  5
  6This module provides functions and constants that are useful across a variety
  7of Arvados resource types, or extend the Arvados API client (see `arvados.api`).
  8"""
  9
 10import errno
 11import fcntl
 12import hashlib
 13import httplib2
 14import operator
 15import os
 16import random
 17import re
 18import subprocess
 19import sys
 20
 21import arvados.errors
 22
 23from typing import (
 24    Any,
 25    Callable,
 26    Container,
 27    Dict,
 28    Iterator,
 29    List,
 30    TypeVar,
 31    Union,
 32)
 33
 34T = TypeVar('T')
 35
 36HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
 37"""Regular expression to match a hexadecimal string (case-insensitive)"""
 38CR_UNCOMMITTED = 'Uncommitted'
 39"""Constant `state` value for uncommited container requests"""
 40CR_COMMITTED = 'Committed'
 41"""Constant `state` value for committed container requests"""
 42CR_FINAL = 'Final'
 43"""Constant `state` value for finalized container requests"""
 44
 45keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*')
 46"""Regular expression to match any Keep block locator"""
 47signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*\+A\S+(\+\S+)*')
 48"""Regular expression to match any Keep block locator with an access token hint"""
 49portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+')
 50"""Regular expression to match any collection portable data hash"""
 51manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+[0-9]+)(\+\S+)*)+( +[0-9]+:[0-9]+:\S+)+$)+', flags=re.MULTILINE)
 52"""Regular expression to match an Arvados collection manifest text"""
 53keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+[0-9]+)/(.*)')
 54"""Regular expression to match a file path from a collection identified by portable data hash"""
 55keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+[0-9]+)/(.*)')
 56"""Regular expression to match a `keep:` URI with a collection identified by portable data hash"""
 57
 58uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
 59"""Regular expression to match any Arvados object UUID"""
 60collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
 61"""Regular expression to match any Arvados collection UUID"""
 62container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
 63"""Regular expression to match any Arvados container UUID"""
 64group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
 65"""Regular expression to match any Arvados group UUID"""
 66link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
 67"""Regular expression to match any Arvados link UUID"""
 68user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
 69"""Regular expression to match any Arvados user UUID"""
 70
 71def is_hex(s: str, *length_args: int) -> bool:
 72    """Indicate whether a string is a hexadecimal number
 73
 74    This method returns true if all characters in the string are hexadecimal
 75    digits. It is case-insensitive.
 76
 77    You can also pass optional length arguments to check that the string has
 78    the expected number of digits. If you pass one integer, the string must
 79    have that length exactly, otherwise the method returns False. If you
 80    pass two integers, the string's length must fall within that minimum and
 81    maximum (inclusive), otherwise the method returns False.
 82
 83    Arguments:
 84
 85    * s: str --- The string to check
 86
 87    * length_args: int --- Optional length limit(s) for the string to check
 88    """
 89    num_length_args = len(length_args)
 90    if num_length_args > 2:
 91        raise arvados.errors.ArgumentError(
 92            "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
 93    elif num_length_args == 2:
 94        good_len = (length_args[0] <= len(s) <= length_args[1])
 95    elif num_length_args == 1:
 96        good_len = (len(s) == length_args[0])
 97    else:
 98        good_len = True
 99    return bool(good_len and HEX_RE.match(s))
100
101def keyset_list_all(
102        fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
103        order_key: str="created_at",
104        num_retries: int=0,
105        ascending: bool=True,
106        key_fields: Container[str]=('uuid',),
107        **kwargs: Any,
108) -> Iterator[Dict[str, Any]]:
109    """Iterate all Arvados resources from an API list call
110
111    This method takes a method that represents an Arvados API list call, and
112    iterates the objects returned by the API server. It can make multiple API
113    calls to retrieve and iterate all objects available from the API server.
114
115    Arguments:
116
117    * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- A
118      function that wraps an Arvados API method that returns a list of
119      objects. If you have an Arvados API client named `arv`, examples
120      include `arv.collections().list` and `arv.groups().contents`. Note
121      that you should pass the function *without* calling it.
122
123    * order_key: str --- The name of the primary object field that objects
124      should be sorted by. This name is used to build an `order` argument
125      for `fn`. Default `'created_at'`.
126
127    * num_retries: int --- This argument is passed through to
128      `arvados.api_resources.ArvadosAPIRequest.execute` for each API call. See
129      that method's docstring for details. Default 0 (meaning API calls will
130      use the `num_retries` value set when the Arvados API client was
131      constructed).
132
133    * ascending: bool --- Used to build an `order` argument for `fn`. If True,
134      all fields will be sorted in `'asc'` (ascending) order. Otherwise, all
135      fields will be sorted in `'desc'` (descending) order.
136
137    * key_fields: Container[str] --- One or two fields that constitute
138      a unique key for returned items.  Normally this should be the
139      default value `('uuid',)`, unless `fn` returns
140      computed_permissions records, in which case it should be
141      `('user_uuid', 'target_uuid')`.  If two fields are given, one of
142      them must be equal to `order_key`.
143
144    Additional keyword arguments will be passed directly to `fn` for each API
145    call. Note that this function sets `count`, `limit`, and `order` as part of
146    its work.
147
148    """
149    tiebreak_keys = set(key_fields) - {order_key}
150    if len(tiebreak_keys) == 0:
151        tiebreak_key = 'uuid'
152    elif len(tiebreak_keys) == 1:
153        tiebreak_key = tiebreak_keys.pop()
154    else:
155        raise arvados.errors.ArgumentError(
156            "key_fields can have at most one entry that is not order_key")
157
158    pagesize = 1000
159    kwargs["limit"] = pagesize
160    kwargs["count"] = 'none'
161    asc = "asc" if ascending else "desc"
162    kwargs["order"] = [f"{order_key} {asc}", f"{tiebreak_key} {asc}"]
163    other_filters = kwargs.get("filters", [])
164
165    if 'select' in kwargs:
166        kwargs['select'] = list({*kwargs['select'], *key_fields, order_key})
167
168    nextpage = []
169    tot = 0
170    expect_full_page = True
171    key_getter = operator.itemgetter(*key_fields)
172    seen_prevpage = set()
173    seen_thispage = set()
174    lastitem = None
175    prev_page_all_same_order_key = False
176
177    while True:
178        kwargs["filters"] = nextpage+other_filters
179        items = fn(**kwargs).execute(num_retries=num_retries)
180
181        if len(items["items"]) == 0:
182            if prev_page_all_same_order_key:
183                nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
184                prev_page_all_same_order_key = False
185                continue
186            else:
187                return
188
189        seen_prevpage = seen_thispage
190        seen_thispage = set()
191
192        for i in items["items"]:
193            # In cases where there's more than one record with the
194            # same order key, the result could include records we
195            # already saw in the last page.  Skip them.
196            seen_key = key_getter(i)
197            if seen_key in seen_prevpage:
198                continue
199            seen_thispage.add(seen_key)
200            yield i
201
202        firstitem = items["items"][0]
203        lastitem = items["items"][-1]
204
205        if firstitem[order_key] == lastitem[order_key]:
206            # Got a page where every item has the same order key.
207            # Switch to using tiebreak key for paging.
208            nextpage = [[order_key, "=", lastitem[order_key]], [tiebreak_key, ">" if ascending else "<", lastitem[tiebreak_key]]]
209            prev_page_all_same_order_key = True
210        else:
211            # Start from the last order key seen, but skip the last
212            # known uuid to avoid retrieving the same row twice.  If
213            # there are multiple rows with the same order key it is
214            # still likely we'll end up retrieving duplicate rows.
215            # That's handled by tracking the "seen" rows for each page
216            # so they can be skipped if they show up on the next page.
217            nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]]]
218            if tiebreak_key == "uuid":
219                nextpage += [[tiebreak_key, "!=", lastitem[tiebreak_key]]]
220            prev_page_all_same_order_key = False
221
222def iter_computed_permissions(
223        fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
224        order_key: str='user_uuid',
225        num_retries: int=0,
226        ascending: bool=True,
227        key_fields: Container[str]=('user_uuid', 'target_uuid'),
228        **kwargs: Any,
229) -> Iterator[Dict[str, Any]]:
230    """Iterate all `computed_permission` resources
231
232    This method is the same as `keyset_list_all`, except that its
233    default arguments are suitable for the computed_permissions API.
234
235    Arguments:
236
237    * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] ---
238      see `keyset_list_all`.  Typically this is an instance of
239      `arvados.api_resources.ComputedPermissions.list`.  Given an
240      Arvados API client named `arv`, typical usage is
241      `iter_computed_permissions(arv.computed_permissions().list)`.
242
243    * order_key: str --- see `keyset_list_all`.  Default
244      `'user_uuid'`.
245
246    * num_retries: int --- see `keyset_list_all`.
247
248    * ascending: bool --- see `keyset_list_all`.
249
250    * key_fields: Container[str] --- see `keyset_list_all`. Default
251      `('user_uuid', 'target_uuid')`.
252
253    """
254    return keyset_list_all(
255        fn=fn,
256        order_key=order_key,
257        num_retries=num_retries,
258        ascending=ascending,
259        key_fields=key_fields,
260        **kwargs)
261
262def ca_certs_path(fallback: T=httplib2.CA_CERTS) -> Union[str, T]:
263    """Return the path of the best available source of CA certificates
264
265    This function checks various known paths that provide trusted CA
266    certificates, and returns the first one that exists. It checks:
267
268    * the path in the `SSL_CERT_FILE` environment variable (used by OpenSSL)
269    * `/etc/arvados/ca-certificates.crt`, respected by all Arvados software
270    * `/etc/ssl/certs/ca-certificates.crt`, the default store on Debian-based
271      distributions
272    * `/etc/pki/tls/certs/ca-bundle.crt`, the default store on Red Hat-based
273      distributions
274
275    If none of these paths exist, this function returns the value of `fallback`.
276
277    Arguments:
278
279    * fallback: T --- The value to return if none of the known paths exist.
280      The default value is the certificate store of Mozilla's trusted CAs
281      included with the Python [certifi][] package.
282
283    [certifi]: https://pypi.org/project/certifi/
284    """
285    for ca_certs_path in [
286        # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
287        # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
288        os.environ.get('SSL_CERT_FILE'),
289        # Arvados specific:
290        '/etc/arvados/ca-certificates.crt',
291        # Debian:
292        '/etc/ssl/certs/ca-certificates.crt',
293        # Red Hat:
294        '/etc/pki/tls/certs/ca-bundle.crt',
295        ]:
296        if ca_certs_path and os.path.exists(ca_certs_path):
297            return ca_certs_path
298    return fallback
299
300def new_request_id() -> str:
301    """Return a random request ID
302
303    This function generates and returns a random string suitable for use as a
304    `X-Request-Id` header value in the Arvados API.
305    """
306    rid = "req-"
307    # 2**104 > 36**20 > 2**103
308    n = random.getrandbits(104)
309    for _ in range(20):
310        c = n % 36
311        if c < 10:
312            rid += chr(c+ord('0'))
313        else:
314            rid += chr(c+ord('a')-10)
315        n = n // 36
316    return rid
317
318def get_config_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
319    """Return an Arvados cluster's configuration, with caching
320
321    This function gets and returns the Arvados configuration from the API
322    server. It caches the result on the client object and reuses it on any
323    future calls.
324
325    Arguments:
326
327    * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
328      object to use to retrieve and cache the Arvados cluster configuration.
329    """
330    if not svc._rootDesc.get('resources').get('configs', False):
331        # Old API server version, no config export endpoint
332        return {}
333    if not hasattr(svc, '_cached_config'):
334        svc._cached_config = svc.configs().get().execute()
335    return svc._cached_config
336
337def get_vocabulary_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
338    """Return an Arvados cluster's vocabulary, with caching
339
340    This function gets and returns the Arvados vocabulary from the API
341    server. It caches the result on the client object and reuses it on any
342    future calls.
343
344    .. HINT:: Low-level method
345       This is a relatively low-level wrapper around the Arvados API. Most
346       users will prefer to use `arvados.vocabulary.load_vocabulary`.
347
348    Arguments:
349
350    * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
351      object to use to retrieve and cache the Arvados cluster vocabulary.
352    """
353    if not svc._rootDesc.get('resources').get('vocabularies', False):
354        # Old API server version, no vocabulary export endpoint
355        return {}
356    if not hasattr(svc, '_cached_vocabulary'):
357        svc._cached_vocabulary = svc.vocabularies().get().execute()
358    return svc._cached_vocabulary
359
360def trim_name(collectionname: str) -> str:
361    """Limit the length of a name to fit within Arvados API limits
362
363    This function ensures that a string is short enough to use as an object
364    name in the Arvados API, leaving room for text that may be added by the
365    `ensure_unique_name` argument. If the source name is short enough, it is
366    returned unchanged. Otherwise, this function returns a string with excess
367    characters removed from the middle of the source string and replaced with
368    an ellipsis.
369
370    Arguments:
371
372    * collectionname: str --- The desired source name
373    """
374    max_name_len = 254 - 28
375
376    if len(collectionname) > max_name_len:
377        over = len(collectionname) - max_name_len
378        split = int(max_name_len/2)
379        collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
380
381    return collectionname
382
383def iter_storage_classes(
384        config: Dict[str, Any],
385        check: Callable[[Dict[str, Any]], bool]=operator.methodcaller('get', 'Default'),
386        fallback: str="default",
387) -> Iterator[str]:
388    """Read storage classes from the API client config
389
390    This function iterates storage class names for classes in `config` that
391    pass `check`. If no matches are found but `fallback` is given, it is
392    yielded.
393    """
394    any_found = False
395    for key, value in config.get("StorageClasses", {}).items():
396        if check(value):
397            any_found = True
398            yield key
399    if fallback and not any_found:
400        yield fallback
HEX_RE = re.compile('^[0-9a-fA-F]+$')

Regular expression to match a hexadecimal string (case-insensitive)

CR_UNCOMMITTED = 'Uncommitted'

Constant state value for uncommited container requests

CR_COMMITTED = 'Committed'

Constant state value for committed container requests

CR_FINAL = 'Final'

Constant state value for finalized container requests

keep_locator_pattern = re.compile('[0-9a-f]{32}\\+[0-9]+(\\+\\S+)*')

Regular expression to match any Keep block locator

signed_locator_pattern = re.compile('[0-9a-f]{32}\\+[0-9]+(\\+\\S+)*\\+A\\S+(\\+\\S+)*')

Regular expression to match any Keep block locator with an access token hint

portable_data_hash_pattern = re.compile('[0-9a-f]{32}\\+[0-9]+')

Regular expression to match any collection portable data hash

manifest_pattern = re.compile('((\\S+)( +[a-f0-9]{32}(\\+[0-9]+)(\\+\\S+)*)+( +[0-9]+:[0-9]+:\\S+)+$)+', re.MULTILINE)

Regular expression to match an Arvados collection manifest text

keep_file_locator_pattern = re.compile('([0-9a-f]{32}\\+[0-9]+)/(.*)')

Regular expression to match a file path from a collection identified by portable data hash

keepuri_pattern = re.compile('keep:([0-9a-f]{32}\\+[0-9]+)/(.*)')

Regular expression to match a keep: URI with a collection identified by portable data hash

uuid_pattern = re.compile('[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')

Regular expression to match any Arvados object UUID

collection_uuid_pattern = re.compile('[a-z0-9]{5}-4zz18-[a-z0-9]{15}')

Regular expression to match any Arvados collection UUID

container_uuid_pattern = re.compile('[a-z0-9]{5}-dz642-[a-z0-9]{15}')

Regular expression to match any Arvados container UUID

group_uuid_pattern = re.compile('[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')

Regular expression to match any Arvados group UUID

user_uuid_pattern = re.compile('[a-z0-9]{5}-tpzed-[a-z0-9]{15}')

Regular expression to match any Arvados user UUID

def is_hex(s: str, *length_args: int) -> bool:
 72def is_hex(s: str, *length_args: int) -> bool:
 73    """Indicate whether a string is a hexadecimal number
 74
 75    This method returns true if all characters in the string are hexadecimal
 76    digits. It is case-insensitive.
 77
 78    You can also pass optional length arguments to check that the string has
 79    the expected number of digits. If you pass one integer, the string must
 80    have that length exactly, otherwise the method returns False. If you
 81    pass two integers, the string's length must fall within that minimum and
 82    maximum (inclusive), otherwise the method returns False.
 83
 84    Arguments:
 85
 86    * s: str --- The string to check
 87
 88    * length_args: int --- Optional length limit(s) for the string to check
 89    """
 90    num_length_args = len(length_args)
 91    if num_length_args > 2:
 92        raise arvados.errors.ArgumentError(
 93            "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
 94    elif num_length_args == 2:
 95        good_len = (length_args[0] <= len(s) <= length_args[1])
 96    elif num_length_args == 1:
 97        good_len = (len(s) == length_args[0])
 98    else:
 99        good_len = True
100    return bool(good_len and HEX_RE.match(s))

Indicate whether a string is a hexadecimal number

This method returns true if all characters in the string are hexadecimal digits. It is case-insensitive.

You can also pass optional length arguments to check that the string has the expected number of digits. If you pass one integer, the string must have that length exactly, otherwise the method returns False. If you pass two integers, the string’s length must fall within that minimum and maximum (inclusive), otherwise the method returns False.

Arguments:

  • s: str — The string to check

  • length_args: int — Optional length limit(s) for the string to check

def keyset_list_all( fn: Callable[..., arvados.api_resources.ArvadosAPIRequest], order_key: str = 'created_at', num_retries: int = 0, ascending: bool = True, key_fields: Container[str] = ('uuid',), **kwargs: Any) -> Iterator[Dict[str, Any]]:
102def keyset_list_all(
103        fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
104        order_key: str="created_at",
105        num_retries: int=0,
106        ascending: bool=True,
107        key_fields: Container[str]=('uuid',),
108        **kwargs: Any,
109) -> Iterator[Dict[str, Any]]:
110    """Iterate all Arvados resources from an API list call
111
112    This method takes a method that represents an Arvados API list call, and
113    iterates the objects returned by the API server. It can make multiple API
114    calls to retrieve and iterate all objects available from the API server.
115
116    Arguments:
117
118    * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- A
119      function that wraps an Arvados API method that returns a list of
120      objects. If you have an Arvados API client named `arv`, examples
121      include `arv.collections().list` and `arv.groups().contents`. Note
122      that you should pass the function *without* calling it.
123
124    * order_key: str --- The name of the primary object field that objects
125      should be sorted by. This name is used to build an `order` argument
126      for `fn`. Default `'created_at'`.
127
128    * num_retries: int --- This argument is passed through to
129      `arvados.api_resources.ArvadosAPIRequest.execute` for each API call. See
130      that method's docstring for details. Default 0 (meaning API calls will
131      use the `num_retries` value set when the Arvados API client was
132      constructed).
133
134    * ascending: bool --- Used to build an `order` argument for `fn`. If True,
135      all fields will be sorted in `'asc'` (ascending) order. Otherwise, all
136      fields will be sorted in `'desc'` (descending) order.
137
138    * key_fields: Container[str] --- One or two fields that constitute
139      a unique key for returned items.  Normally this should be the
140      default value `('uuid',)`, unless `fn` returns
141      computed_permissions records, in which case it should be
142      `('user_uuid', 'target_uuid')`.  If two fields are given, one of
143      them must be equal to `order_key`.
144
145    Additional keyword arguments will be passed directly to `fn` for each API
146    call. Note that this function sets `count`, `limit`, and `order` as part of
147    its work.
148
149    """
150    tiebreak_keys = set(key_fields) - {order_key}
151    if len(tiebreak_keys) == 0:
152        tiebreak_key = 'uuid'
153    elif len(tiebreak_keys) == 1:
154        tiebreak_key = tiebreak_keys.pop()
155    else:
156        raise arvados.errors.ArgumentError(
157            "key_fields can have at most one entry that is not order_key")
158
159    pagesize = 1000
160    kwargs["limit"] = pagesize
161    kwargs["count"] = 'none'
162    asc = "asc" if ascending else "desc"
163    kwargs["order"] = [f"{order_key} {asc}", f"{tiebreak_key} {asc}"]
164    other_filters = kwargs.get("filters", [])
165
166    if 'select' in kwargs:
167        kwargs['select'] = list({*kwargs['select'], *key_fields, order_key})
168
169    nextpage = []
170    tot = 0
171    expect_full_page = True
172    key_getter = operator.itemgetter(*key_fields)
173    seen_prevpage = set()
174    seen_thispage = set()
175    lastitem = None
176    prev_page_all_same_order_key = False
177
178    while True:
179        kwargs["filters"] = nextpage+other_filters
180        items = fn(**kwargs).execute(num_retries=num_retries)
181
182        if len(items["items"]) == 0:
183            if prev_page_all_same_order_key:
184                nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
185                prev_page_all_same_order_key = False
186                continue
187            else:
188                return
189
190        seen_prevpage = seen_thispage
191        seen_thispage = set()
192
193        for i in items["items"]:
194            # In cases where there's more than one record with the
195            # same order key, the result could include records we
196            # already saw in the last page.  Skip them.
197            seen_key = key_getter(i)
198            if seen_key in seen_prevpage:
199                continue
200            seen_thispage.add(seen_key)
201            yield i
202
203        firstitem = items["items"][0]
204        lastitem = items["items"][-1]
205
206        if firstitem[order_key] == lastitem[order_key]:
207            # Got a page where every item has the same order key.
208            # Switch to using tiebreak key for paging.
209            nextpage = [[order_key, "=", lastitem[order_key]], [tiebreak_key, ">" if ascending else "<", lastitem[tiebreak_key]]]
210            prev_page_all_same_order_key = True
211        else:
212            # Start from the last order key seen, but skip the last
213            # known uuid to avoid retrieving the same row twice.  If
214            # there are multiple rows with the same order key it is
215            # still likely we'll end up retrieving duplicate rows.
216            # That's handled by tracking the "seen" rows for each page
217            # so they can be skipped if they show up on the next page.
218            nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]]]
219            if tiebreak_key == "uuid":
220                nextpage += [[tiebreak_key, "!=", lastitem[tiebreak_key]]]
221            prev_page_all_same_order_key = False

Iterate all Arvados resources from an API list call

This method takes a method that represents an Arvados API list call, and iterates the objects returned by the API server. It can make multiple API calls to retrieve and iterate all objects available from the API server.

Arguments:

  • fn: Callable[…, arvados.api_resources.ArvadosAPIRequest] — A function that wraps an Arvados API method that returns a list of objects. If you have an Arvados API client named arv, examples include arv.collections().list and arv.groups().contents. Note that you should pass the function without calling it.

  • order_key: str — The name of the primary object field that objects should be sorted by. This name is used to build an order argument for fn. Default 'created_at'.

  • num_retries: int — This argument is passed through to arvados.api_resources.ArvadosAPIRequest.execute for each API call. See that method’s docstring for details. Default 0 (meaning API calls will use the num_retries value set when the Arvados API client was constructed).

  • ascending: bool — Used to build an order argument for fn. If True, all fields will be sorted in 'asc' (ascending) order. Otherwise, all fields will be sorted in 'desc' (descending) order.

  • key_fields: Container[str] — One or two fields that constitute a unique key for returned items. Normally this should be the default value ('uuid',), unless fn returns computed_permissions records, in which case it should be ('user_uuid', 'target_uuid'). If two fields are given, one of them must be equal to order_key.

Additional keyword arguments will be passed directly to fn for each API call. Note that this function sets count, limit, and order as part of its work.

def iter_computed_permissions( fn: Callable[..., arvados.api_resources.ArvadosAPIRequest], order_key: str = 'user_uuid', num_retries: int = 0, ascending: bool = True, key_fields: Container[str] = ('user_uuid', 'target_uuid'), **kwargs: Any) -> Iterator[Dict[str, Any]]:
223def iter_computed_permissions(
224        fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
225        order_key: str='user_uuid',
226        num_retries: int=0,
227        ascending: bool=True,
228        key_fields: Container[str]=('user_uuid', 'target_uuid'),
229        **kwargs: Any,
230) -> Iterator[Dict[str, Any]]:
231    """Iterate all `computed_permission` resources
232
233    This method is the same as `keyset_list_all`, except that its
234    default arguments are suitable for the computed_permissions API.
235
236    Arguments:
237
238    * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] ---
239      see `keyset_list_all`.  Typically this is an instance of
240      `arvados.api_resources.ComputedPermissions.list`.  Given an
241      Arvados API client named `arv`, typical usage is
242      `iter_computed_permissions(arv.computed_permissions().list)`.
243
244    * order_key: str --- see `keyset_list_all`.  Default
245      `'user_uuid'`.
246
247    * num_retries: int --- see `keyset_list_all`.
248
249    * ascending: bool --- see `keyset_list_all`.
250
251    * key_fields: Container[str] --- see `keyset_list_all`. Default
252      `('user_uuid', 'target_uuid')`.
253
254    """
255    return keyset_list_all(
256        fn=fn,
257        order_key=order_key,
258        num_retries=num_retries,
259        ascending=ascending,
260        key_fields=key_fields,
261        **kwargs)

Iterate all computed_permission resources

This method is the same as keyset_list_all, except that its default arguments are suitable for the computed_permissions API.

Arguments:

def ca_certs_path( fallback: ~T = '/var/lib/gitolite3/venv-pdoc/lib/python3.9/site-packages/certifi/cacert.pem') -> Union[str, ~T]:
263def ca_certs_path(fallback: T=httplib2.CA_CERTS) -> Union[str, T]:
264    """Return the path of the best available source of CA certificates
265
266    This function checks various known paths that provide trusted CA
267    certificates, and returns the first one that exists. It checks:
268
269    * the path in the `SSL_CERT_FILE` environment variable (used by OpenSSL)
270    * `/etc/arvados/ca-certificates.crt`, respected by all Arvados software
271    * `/etc/ssl/certs/ca-certificates.crt`, the default store on Debian-based
272      distributions
273    * `/etc/pki/tls/certs/ca-bundle.crt`, the default store on Red Hat-based
274      distributions
275
276    If none of these paths exist, this function returns the value of `fallback`.
277
278    Arguments:
279
280    * fallback: T --- The value to return if none of the known paths exist.
281      The default value is the certificate store of Mozilla's trusted CAs
282      included with the Python [certifi][] package.
283
284    [certifi]: https://pypi.org/project/certifi/
285    """
286    for ca_certs_path in [
287        # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
288        # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
289        os.environ.get('SSL_CERT_FILE'),
290        # Arvados specific:
291        '/etc/arvados/ca-certificates.crt',
292        # Debian:
293        '/etc/ssl/certs/ca-certificates.crt',
294        # Red Hat:
295        '/etc/pki/tls/certs/ca-bundle.crt',
296        ]:
297        if ca_certs_path and os.path.exists(ca_certs_path):
298            return ca_certs_path
299    return fallback

Return the path of the best available source of CA certificates

This function checks various known paths that provide trusted CA certificates, and returns the first one that exists. It checks:

  • the path in the SSL_CERT_FILE environment variable (used by OpenSSL)
  • /etc/arvados/ca-certificates.crt, respected by all Arvados software
  • /etc/ssl/certs/ca-certificates.crt, the default store on Debian-based distributions
  • /etc/pki/tls/certs/ca-bundle.crt, the default store on Red Hat-based distributions

If none of these paths exist, this function returns the value of fallback.

Arguments:

  • fallback: T — The value to return if none of the known paths exist. The default value is the certificate store of Mozilla’s trusted CAs included with the Python certifi package.
def new_request_id() -> str:
301def new_request_id() -> str:
302    """Return a random request ID
303
304    This function generates and returns a random string suitable for use as a
305    `X-Request-Id` header value in the Arvados API.
306    """
307    rid = "req-"
308    # 2**104 > 36**20 > 2**103
309    n = random.getrandbits(104)
310    for _ in range(20):
311        c = n % 36
312        if c < 10:
313            rid += chr(c+ord('0'))
314        else:
315            rid += chr(c+ord('a')-10)
316        n = n // 36
317    return rid

Return a random request ID

This function generates and returns a random string suitable for use as a X-Request-Id header value in the Arvados API.

def get_config_once(svc: arvados.api_resources.ArvadosAPIClient) -> Dict[str, Any]:
319def get_config_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
320    """Return an Arvados cluster's configuration, with caching
321
322    This function gets and returns the Arvados configuration from the API
323    server. It caches the result on the client object and reuses it on any
324    future calls.
325
326    Arguments:
327
328    * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
329      object to use to retrieve and cache the Arvados cluster configuration.
330    """
331    if not svc._rootDesc.get('resources').get('configs', False):
332        # Old API server version, no config export endpoint
333        return {}
334    if not hasattr(svc, '_cached_config'):
335        svc._cached_config = svc.configs().get().execute()
336    return svc._cached_config

Return an Arvados cluster’s configuration, with caching

This function gets and returns the Arvados configuration from the API server. It caches the result on the client object and reuses it on any future calls.

Arguments:

def get_vocabulary_once(svc: arvados.api_resources.ArvadosAPIClient) -> Dict[str, Any]:
338def get_vocabulary_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
339    """Return an Arvados cluster's vocabulary, with caching
340
341    This function gets and returns the Arvados vocabulary from the API
342    server. It caches the result on the client object and reuses it on any
343    future calls.
344
345    .. HINT:: Low-level method
346       This is a relatively low-level wrapper around the Arvados API. Most
347       users will prefer to use `arvados.vocabulary.load_vocabulary`.
348
349    Arguments:
350
351    * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
352      object to use to retrieve and cache the Arvados cluster vocabulary.
353    """
354    if not svc._rootDesc.get('resources').get('vocabularies', False):
355        # Old API server version, no vocabulary export endpoint
356        return {}
357    if not hasattr(svc, '_cached_vocabulary'):
358        svc._cached_vocabulary = svc.vocabularies().get().execute()
359    return svc._cached_vocabulary

Return an Arvados cluster’s vocabulary, with caching

This function gets and returns the Arvados vocabulary from the API server. It caches the result on the client object and reuses it on any future calls.

Arguments:

def trim_name(collectionname: str) -> str:
361def trim_name(collectionname: str) -> str:
362    """Limit the length of a name to fit within Arvados API limits
363
364    This function ensures that a string is short enough to use as an object
365    name in the Arvados API, leaving room for text that may be added by the
366    `ensure_unique_name` argument. If the source name is short enough, it is
367    returned unchanged. Otherwise, this function returns a string with excess
368    characters removed from the middle of the source string and replaced with
369    an ellipsis.
370
371    Arguments:
372
373    * collectionname: str --- The desired source name
374    """
375    max_name_len = 254 - 28
376
377    if len(collectionname) > max_name_len:
378        over = len(collectionname) - max_name_len
379        split = int(max_name_len/2)
380        collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
381
382    return collectionname

Limit the length of a name to fit within Arvados API limits

This function ensures that a string is short enough to use as an object name in the Arvados API, leaving room for text that may be added by the ensure_unique_name argument. If the source name is short enough, it is returned unchanged. Otherwise, this function returns a string with excess characters removed from the middle of the source string and replaced with an ellipsis.

Arguments:

  • collectionname: str — The desired source name
def iter_storage_classes( config: Dict[str, Any], check: Callable[[Dict[str, Any]], bool] = operator.methodcaller('get', 'Default'), fallback: str = 'default') -> Iterator[str]:
384def iter_storage_classes(
385        config: Dict[str, Any],
386        check: Callable[[Dict[str, Any]], bool]=operator.methodcaller('get', 'Default'),
387        fallback: str="default",
388) -> Iterator[str]:
389    """Read storage classes from the API client config
390
391    This function iterates storage class names for classes in `config` that
392    pass `check`. If no matches are found but `fallback` is given, it is
393    yielded.
394    """
395    any_found = False
396    for key, value in config.get("StorageClasses", {}).items():
397        if check(value):
398            any_found = True
399            yield key
400    if fallback and not any_found:
401        yield fallback

Read storage classes from the API client config

This function iterates storage class names for classes in config that pass check. If no matches are found but fallback is given, it is yielded.