arvados.util

Arvados utilities

This module provides functions and constants that are useful across a variety of Arvados resource types, or extend the Arvados API client (see arvados.api).

View Source

  1# Copyright (C) The Arvados Authors. All rights reserved.
  2#
  3# SPDX-License-Identifier: Apache-2.0
  4"""Arvados utilities
  5
  6This module provides functions and constants that are useful across a variety
  7of Arvados resource types, or extend the Arvados API client (see `arvados.api`).
  8"""
  9
 10import errno
 11import fcntl
 12import hashlib
 13import httplib2
 14import operator
 15import os
 16import random
 17import re
 18import subprocess
 19import sys
 20
 21import arvados.errors
 22
 23from typing import (
 24    Any,
 25    Callable,
 26    Container,
 27    Dict,
 28    Iterator,
 29    List,
 30    TypeVar,
 31    Union,
 32)
 33
 34T = TypeVar('T')
 35
 36HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
 37"""Regular expression to match a hexadecimal string (case-insensitive)"""
 38CR_UNCOMMITTED = 'Uncommitted'
 39"""Constant `state` value for uncommited container requests"""
 40CR_COMMITTED = 'Committed'
 41"""Constant `state` value for committed container requests"""
 42CR_FINAL = 'Final'
 43"""Constant `state` value for finalized container requests"""
 44
 45keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*')
 46"""Regular expression to match any Keep block locator"""
 47signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*\+A\S+(\+\S+)*')
 48"""Regular expression to match any Keep block locator with an access token hint"""
 49portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+')
 50"""Regular expression to match any collection portable data hash"""
 51manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+[0-9]+)(\+\S+)*)+( +[0-9]+:[0-9]+:\S+)+$)+', flags=re.MULTILINE)
 52"""Regular expression to match an Arvados collection manifest text"""
 53keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+[0-9]+)/(.*)')
 54"""Regular expression to match a file path from a collection identified by portable data hash"""
 55keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+[0-9]+)/(.*)')
 56"""Regular expression to match a `keep:` URI with a collection identified by portable data hash"""
 57
 58uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
 59"""Regular expression to match any Arvados object UUID"""
 60collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
 61"""Regular expression to match any Arvados collection UUID"""
 62container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
 63"""Regular expression to match any Arvados container UUID"""
 64group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
 65"""Regular expression to match any Arvados group UUID"""
 66link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
 67"""Regular expression to match any Arvados link UUID"""
 68user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
 69"""Regular expression to match any Arvados user UUID"""
 70
 71def is_hex(s: str, *length_args: int) -> bool:
 72    """Indicate whether a string is a hexadecimal number
 73
 74    This method returns true if all characters in the string are hexadecimal
 75    digits. It is case-insensitive.
 76
 77    You can also pass optional length arguments to check that the string has
 78    the expected number of digits. If you pass one integer, the string must
 79    have that length exactly, otherwise the method returns False. If you
 80    pass two integers, the string's length must fall within that minimum and
 81    maximum (inclusive), otherwise the method returns False.
 82
 83    Arguments:
 84
 85    * s: str --- The string to check
 86
 87    * length_args: int --- Optional length limit(s) for the string to check
 88    """
 89    num_length_args = len(length_args)
 90    if num_length_args > 2:
 91        raise arvados.errors.ArgumentError(
 92            "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
 93    elif num_length_args == 2:
 94        good_len = (length_args[0] <= len(s) <= length_args[1])
 95    elif num_length_args == 1:
 96        good_len = (len(s) == length_args[0])
 97    else:
 98        good_len = True
 99    return bool(good_len and HEX_RE.match(s))
100
101
102def keyset_list_all(
103        fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
104        order_key: str="created_at",
105        num_retries: int=0,
106        ascending: bool=True,
107        key_fields: Container[str]=('uuid',),
108        **kwargs: Any,
109) -> Iterator[Dict[str, Any]]:
110    """Iterate all Arvados resources from an API list call
111
112    This method takes a method that represents an Arvados API list call, and
113    iterates the objects returned by the API server. It can make multiple API
114    calls to retrieve and iterate all objects available from the API server.
115
116    Arguments:
117
118    * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- A
119      function that wraps an Arvados API method that returns a list of
120      objects. If you have an Arvados API client named `arv`, examples
121      include `arv.collections().list` and `arv.groups().contents`. Note
122      that you should pass the function *without* calling it.
123
124    * order_key: str --- The name of the primary object field that objects
125      should be sorted by. This name is used to build an `order` argument
126      for `fn`. Default `'created_at'`.
127
128    * num_retries: int --- This argument is passed through to
129      `arvados.api_resources.ArvadosAPIRequest.execute` for each API call. See
130      that method's docstring for details. Default 0 (meaning API calls will
131      use the `num_retries` value set when the Arvados API client was
132      constructed).
133
134    * ascending: bool --- Used to build an `order` argument for `fn`. If True,
135      all fields will be sorted in `'asc'` (ascending) order. Otherwise, all
136      fields will be sorted in `'desc'` (descending) order.
137
138    * key_fields: Container[str] --- One or two fields that constitute
139      a unique key for returned items.  Normally this should be the
140      default value `('uuid',)`, unless `fn` returns
141      computed_permissions records, in which case it should be
142      `('user_uuid', 'target_uuid')`.  If two fields are given, one of
143      them must be equal to `order_key`.
144
145    Additional keyword arguments will be passed directly to `fn` for each API
146    call. Note that this function sets `count`, `limit`, and `order` as part of
147    its work.
148
149    """
150    tiebreak_keys = set(key_fields) - {order_key}
151    if len(tiebreak_keys) == 0:
152        tiebreak_key = 'uuid'
153    elif len(tiebreak_keys) == 1:
154        tiebreak_key = tiebreak_keys.pop()
155    else:
156        raise arvados.errors.ArgumentError(
157            "key_fields can have at most one entry that is not order_key")
158
159    pagesize = 1000
160    kwargs["limit"] = pagesize
161    kwargs["count"] = 'none'
162    asc = "asc" if ascending else "desc"
163    kwargs["order"] = [f"{order_key} {asc}", f"{tiebreak_key} {asc}"]
164    other_filters = kwargs.get("filters", [])
165
166    if 'select' in kwargs:
167        kwargs['select'] = list({*kwargs['select'], *key_fields, order_key})
168
169    nextpage = []
170    tot = 0
171    expect_full_page = True
172    key_getter = operator.itemgetter(*key_fields)
173    seen_prevpage = set()
174    seen_thispage = set()
175    lastitem = None
176    prev_page_all_same_order_key = False
177
178    while True:
179        kwargs["filters"] = nextpage+other_filters
180        items = fn(**kwargs).execute(num_retries=num_retries)
181
182        if len(items["items"]) == 0:
183            if prev_page_all_same_order_key:
184                nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
185                prev_page_all_same_order_key = False
186                continue
187            else:
188                return
189
190        seen_prevpage = seen_thispage
191        seen_thispage = set()
192
193        for i in items["items"]:
194            # In cases where there's more than one record with the
195            # same order key, the result could include records we
196            # already saw in the last page.  Skip them.
197            seen_key = key_getter(i)
198            if seen_key in seen_prevpage:
199                continue
200            seen_thispage.add(seen_key)
201            yield i
202
203        firstitem = items["items"][0]
204        lastitem = items["items"][-1]
205
206        if firstitem[order_key] == lastitem[order_key]:
207            # Got a page where every item has the same order key.
208            # Switch to using tiebreak key for paging.
209            nextpage = [[order_key, "=", lastitem[order_key]], [tiebreak_key, ">" if ascending else "<", lastitem[tiebreak_key]]]
210            prev_page_all_same_order_key = True
211        else:
212            # Start from the last order key seen, but skip the last
213            # known uuid to avoid retrieving the same row twice.  If
214            # there are multiple rows with the same order key it is
215            # still likely we'll end up retrieving duplicate rows.
216            # That's handled by tracking the "seen" rows for each page
217            # so they can be skipped if they show up on the next page.
218            nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]]]
219            if tiebreak_key == "uuid":
220                nextpage += [[tiebreak_key, "!=", lastitem[tiebreak_key]]]
221            prev_page_all_same_order_key = False
222
223
224def iter_computed_permissions(
225        fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
226        order_key: str='user_uuid',
227        num_retries: int=0,
228        ascending: bool=True,
229        key_fields: Container[str]=('user_uuid', 'target_uuid'),
230        **kwargs: Any,
231) -> Iterator[Dict[str, Any]]:
232    """Iterate all `computed_permission` resources
233
234    This method is the same as `keyset_list_all`, except that its
235    default arguments are suitable for the computed_permissions API.
236
237    Arguments:
238
239    * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] ---
240      see `keyset_list_all`.  Typically this is an instance of
241      `arvados.api_resources.ComputedPermissions.list`.  Given an
242      Arvados API client named `arv`, typical usage is
243      `iter_computed_permissions(arv.computed_permissions().list)`.
244
245    * order_key: str --- see `keyset_list_all`.  Default
246      `'user_uuid'`.
247
248    * num_retries: int --- see `keyset_list_all`.
249
250    * ascending: bool --- see `keyset_list_all`.
251
252    * key_fields: Container[str] --- see `keyset_list_all`. Default
253      `('user_uuid', 'target_uuid')`.
254
255    """
256    return keyset_list_all(
257        fn=fn,
258        order_key=order_key,
259        num_retries=num_retries,
260        ascending=ascending,
261        key_fields=key_fields,
262        **kwargs)
263
264
265def ca_certs_path(fallback: T=httplib2.CA_CERTS) -> Union[str, T]:
266    """Return the path of the best available source of CA certificates
267
268    This function checks various known paths that provide trusted CA
269    certificates, and returns the first one that exists. It checks:
270
271    * the path in the `SSL_CERT_FILE` environment variable (used by OpenSSL)
272    * `/etc/arvados/ca-certificates.crt`, respected by all Arvados software
273    * `/etc/ssl/certs/ca-certificates.crt`, the default store on Debian-based
274      distributions
275    * `/etc/pki/tls/certs/ca-bundle.crt`, the default store on Red Hat-based
276      distributions
277
278    If none of these paths exist, this function returns the value of `fallback`.
279
280    Arguments:
281
282    * fallback: T --- The value to return if none of the known paths exist.
283      The default value is the certificate store of Mozilla's trusted CAs
284      included with the Python [certifi][] package.
285
286    [certifi]: https://pypi.org/project/certifi/
287    """
288    for ca_certs_path in [
289        # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
290        # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
291        os.environ.get('SSL_CERT_FILE'),
292        # Arvados specific:
293        '/etc/arvados/ca-certificates.crt',
294        # Debian:
295        '/etc/ssl/certs/ca-certificates.crt',
296        # Red Hat:
297        '/etc/pki/tls/certs/ca-bundle.crt',
298        ]:
299        if ca_certs_path and os.path.exists(ca_certs_path):
300            return ca_certs_path
301    return fallback
302
303
304def new_request_id() -> str:
305    """Return a random request ID
306
307    This function generates and returns a random string suitable for use as a
308    `X-Request-Id` header value in the Arvados API.
309    """
310    rid = "req-"
311    # 2**104 > 36**20 > 2**103
312    n = random.getrandbits(104)
313    for _ in range(20):
314        c = n % 36
315        if c < 10:
316            rid += chr(c+ord('0'))
317        else:
318            rid += chr(c+ord('a')-10)
319        n = n // 36
320    return rid
321
322
323def get_config_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
324    """Return an Arvados cluster's configuration, with caching
325
326    This function gets and returns the Arvados configuration from the API
327    server. It caches the result on the client object and reuses it on any
328    future calls.
329
330    Arguments:
331
332    * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
333      object to use to retrieve and cache the Arvados cluster configuration.
334    """
335    if not svc._rootDesc.get('resources').get('configs', False):
336        # Old API server version, no config export endpoint
337        return {}
338    if not hasattr(svc, '_cached_config'):
339        svc._cached_config = svc.configs().get().execute()
340    return svc._cached_config
341
342
343def get_vocabulary_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
344    """Return an Arvados cluster's vocabulary, with caching
345
346    This function gets and returns the Arvados vocabulary from the API
347    server. It caches the result on the client object and reuses it on any
348    future calls.
349
350    .. HINT:: Low-level method
351       This is a relatively low-level wrapper around the Arvados API. Most
352       users will prefer to use `arvados.vocabulary.load_vocabulary`.
353
354    Arguments:
355
356    * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
357      object to use to retrieve and cache the Arvados cluster vocabulary.
358    """
359    if not svc._rootDesc.get('resources').get('vocabularies', False):
360        # Old API server version, no vocabulary export endpoint
361        return {}
362    if not hasattr(svc, '_cached_vocabulary'):
363        svc._cached_vocabulary = svc.vocabularies().get().execute()
364    return svc._cached_vocabulary
365
366
367def trim_name(collectionname: str) -> str:
368    """Limit the length of a name to fit within Arvados API limits
369
370    This function ensures that a string is short enough to use as an object
371    name in the Arvados API, leaving room for text that may be added by the
372    `ensure_unique_name` argument. If the source name is short enough, it is
373    returned unchanged. Otherwise, this function returns a string with excess
374    characters removed from the middle of the source string and replaced with
375    an ellipsis.
376
377    Arguments:
378
379    * collectionname: str --- The desired source name
380    """
381    max_name_len = 254 - 28
382
383    if len(collectionname) > max_name_len:
384        over = len(collectionname) - max_name_len
385        split = int(max_name_len/2)
386        collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
387
388    return collectionname
389
390
391def iter_storage_classes(
392        config: Dict[str, Any],
393        check: Callable[[Dict[str, Any]], bool]=operator.methodcaller('get', 'Default'),
394        fallback: str="default",
395) -> Iterator[str]:
396    """Read storage classes from the API client config
397
398    This function iterates storage class names for classes in `config` that
399    pass `check`. If no matches are found but `fallback` is given, it is
400    yielded.
401    """
402    any_found = False
403    for key, value in config.get("StorageClasses", {}).items():
404        if check(value):
405            any_found = True
406            yield key
407    if fallback and not any_found:
408        yield fallback

HEX_RE = re.compile('^[0-9a-fA-F]+$')

Regular expression to match a hexadecimal string (case-insensitive)

CR_UNCOMMITTED = 'Uncommitted'

Constant state value for uncommited container requests

CR_COMMITTED = 'Committed'

Constant state value for committed container requests

CR_FINAL = 'Final'

Constant state value for finalized container requests

keep_locator_pattern = re.compile('[0-9a-f]{32}\\+[0-9]+(\\+\\S+)*')

Regular expression to match any Keep block locator

signed_locator_pattern = re.compile('[0-9a-f]{32}\\+[0-9]+(\\+\\S+)*\\+A\\S+(\\+\\S+)*')

Regular expression to match any Keep block locator with an access token hint

portable_data_hash_pattern = re.compile('[0-9a-f]{32}\\+[0-9]+')

Regular expression to match any collection portable data hash

manifest_pattern = re.compile('((\\S+)( +[a-f0-9]{32}(\\+[0-9]+)(\\+\\S+)*)+( +[0-9]+:[0-9]+:\\S+)+$)+', re.MULTILINE)

Regular expression to match an Arvados collection manifest text

keep_file_locator_pattern = re.compile('([0-9a-f]{32}\\+[0-9]+)/(.*)')

Regular expression to match a file path from a collection identified by portable data hash

keepuri_pattern = re.compile('keep:([0-9a-f]{32}\\+[0-9]+)/(.*)')

Regular expression to match a keep: URI with a collection identified by portable data hash

uuid_pattern = re.compile('[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')

Regular expression to match any Arvados object UUID

collection_uuid_pattern = re.compile('[a-z0-9]{5}-4zz18-[a-z0-9]{15}')

Regular expression to match any Arvados collection UUID

container_uuid_pattern = re.compile('[a-z0-9]{5}-dz642-[a-z0-9]{15}')

Regular expression to match any Arvados container UUID

group_uuid_pattern = re.compile('[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')

Regular expression to match any Arvados group UUID

link_uuid_pattern = re.compile('[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')

Regular expression to match any Arvados link UUID

user_uuid_pattern = re.compile('[a-z0-9]{5}-tpzed-[a-z0-9]{15}')

Regular expression to match any Arvados user UUID

def is_hex(s: str, *length_args: int) -> bool: View Source

 72def is_hex(s: str, *length_args: int) -> bool:
 73    """Indicate whether a string is a hexadecimal number
 74
 75    This method returns true if all characters in the string are hexadecimal
 76    digits. It is case-insensitive.
 77
 78    You can also pass optional length arguments to check that the string has
 79    the expected number of digits. If you pass one integer, the string must
 80    have that length exactly, otherwise the method returns False. If you
 81    pass two integers, the string's length must fall within that minimum and
 82    maximum (inclusive), otherwise the method returns False.
 83
 84    Arguments:
 85
 86    * s: str --- The string to check
 87
 88    * length_args: int --- Optional length limit(s) for the string to check
 89    """
 90    num_length_args = len(length_args)
 91    if num_length_args > 2:
 92        raise arvados.errors.ArgumentError(
 93            "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
 94    elif num_length_args == 2:
 95        good_len = (length_args[0] <= len(s) <= length_args[1])
 96    elif num_length_args == 1:
 97        good_len = (len(s) == length_args[0])
 98    else:
 99        good_len = True
100    return bool(good_len and HEX_RE.match(s))

Indicate whether a string is a hexadecimal number

This method returns true if all characters in the string are hexadecimal digits. It is case-insensitive.

You can also pass optional length arguments to check that the string has the expected number of digits. If you pass one integer, the string must have that length exactly, otherwise the method returns False. If you pass two integers, the string’s length must fall within that minimum and maximum (inclusive), otherwise the method returns False.

Arguments:

s: str --- The string to check
length_args: int --- Optional length limit(s) for the string to check

def keyset_list_all( fn: Callable[..., arvados.api_resources.ArvadosAPIRequest], order_key: str = 'created_at', num_retries: int = 0, ascending: bool = True, key_fields: Container[str] = ('uuid',), **kwargs: Any) -> Iterator[Dict[str, Any]]: View Source

103def keyset_list_all(
104        fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
105        order_key: str="created_at",
106        num_retries: int=0,
107        ascending: bool=True,
108        key_fields: Container[str]=('uuid',),
109        **kwargs: Any,
110) -> Iterator[Dict[str, Any]]:
111    """Iterate all Arvados resources from an API list call
112
113    This method takes a method that represents an Arvados API list call, and
114    iterates the objects returned by the API server. It can make multiple API
115    calls to retrieve and iterate all objects available from the API server.
116
117    Arguments:
118
119    * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- A
120      function that wraps an Arvados API method that returns a list of
121      objects. If you have an Arvados API client named `arv`, examples
122      include `arv.collections().list` and `arv.groups().contents`. Note
123      that you should pass the function *without* calling it.
124
125    * order_key: str --- The name of the primary object field that objects
126      should be sorted by. This name is used to build an `order` argument
127      for `fn`. Default `'created_at'`.
128
129    * num_retries: int --- This argument is passed through to
130      `arvados.api_resources.ArvadosAPIRequest.execute` for each API call. See
131      that method's docstring for details. Default 0 (meaning API calls will
132      use the `num_retries` value set when the Arvados API client was
133      constructed).
134
135    * ascending: bool --- Used to build an `order` argument for `fn`. If True,
136      all fields will be sorted in `'asc'` (ascending) order. Otherwise, all
137      fields will be sorted in `'desc'` (descending) order.
138
139    * key_fields: Container[str] --- One or two fields that constitute
140      a unique key for returned items.  Normally this should be the
141      default value `('uuid',)`, unless `fn` returns
142      computed_permissions records, in which case it should be
143      `('user_uuid', 'target_uuid')`.  If two fields are given, one of
144      them must be equal to `order_key`.
145
146    Additional keyword arguments will be passed directly to `fn` for each API
147    call. Note that this function sets `count`, `limit`, and `order` as part of
148    its work.
149
150    """
151    tiebreak_keys = set(key_fields) - {order_key}
152    if len(tiebreak_keys) == 0:
153        tiebreak_key = 'uuid'
154    elif len(tiebreak_keys) == 1:
155        tiebreak_key = tiebreak_keys.pop()
156    else:
157        raise arvados.errors.ArgumentError(
158            "key_fields can have at most one entry that is not order_key")
159
160    pagesize = 1000
161    kwargs["limit"] = pagesize
162    kwargs["count"] = 'none'
163    asc = "asc" if ascending else "desc"
164    kwargs["order"] = [f"{order_key} {asc}", f"{tiebreak_key} {asc}"]
165    other_filters = kwargs.get("filters", [])
166
167    if 'select' in kwargs:
168        kwargs['select'] = list({*kwargs['select'], *key_fields, order_key})
169
170    nextpage = []
171    tot = 0
172    expect_full_page = True
173    key_getter = operator.itemgetter(*key_fields)
174    seen_prevpage = set()
175    seen_thispage = set()
176    lastitem = None
177    prev_page_all_same_order_key = False
178
179    while True:
180        kwargs["filters"] = nextpage+other_filters
181        items = fn(**kwargs).execute(num_retries=num_retries)
182
183        if len(items["items"]) == 0:
184            if prev_page_all_same_order_key:
185                nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
186                prev_page_all_same_order_key = False
187                continue
188            else:
189                return
190
191        seen_prevpage = seen_thispage
192        seen_thispage = set()
193
194        for i in items["items"]:
195            # In cases where there's more than one record with the
196            # same order key, the result could include records we
197            # already saw in the last page.  Skip them.
198            seen_key = key_getter(i)
199            if seen_key in seen_prevpage:
200                continue
201            seen_thispage.add(seen_key)
202            yield i
203
204        firstitem = items["items"][0]
205        lastitem = items["items"][-1]
206
207        if firstitem[order_key] == lastitem[order_key]:
208            # Got a page where every item has the same order key.
209            # Switch to using tiebreak key for paging.
210            nextpage = [[order_key, "=", lastitem[order_key]], [tiebreak_key, ">" if ascending else "<", lastitem[tiebreak_key]]]
211            prev_page_all_same_order_key = True
212        else:
213            # Start from the last order key seen, but skip the last
214            # known uuid to avoid retrieving the same row twice.  If
215            # there are multiple rows with the same order key it is
216            # still likely we'll end up retrieving duplicate rows.
217            # That's handled by tracking the "seen" rows for each page
218            # so they can be skipped if they show up on the next page.
219            nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]]]
220            if tiebreak_key == "uuid":
221                nextpage += [[tiebreak_key, "!=", lastitem[tiebreak_key]]]
222            prev_page_all_same_order_key = False

Iterate all Arvados resources from an API list call

This method takes a method that represents an Arvados API list call, and iterates the objects returned by the API server. It can make multiple API calls to retrieve and iterate all objects available from the API server.

Arguments:

fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- A function that wraps an Arvados API method that returns a list of objects. If you have an Arvados API client named arv, examples include arv.collections().list and arv.groups().contents. Note that you should pass the function without calling it.
order_key: str --- The name of the primary object field that objects should be sorted by. This name is used to build an order argument for fn. Default 'created_at'.
num_retries: int — This argument is passed through to arvados.api_resources.ArvadosAPIRequest.execute for each API call. See that method’s docstring for details. Default 0 (meaning API calls will use the num_retries value set when the Arvados API client was constructed).
ascending: bool --- Used to build an order argument for fn. If True, all fields will be sorted in 'asc' (ascending) order. Otherwise, all fields will be sorted in 'desc' (descending) order.
key_fields: Container[str] --- One or two fields that constitute a unique key for returned items. Normally this should be the default value ('uuid',), unless fn returns computed_permissions records, in which case it should be ('user_uuid', 'target_uuid'). If two fields are given, one of them must be equal to order_key.

Additional keyword arguments will be passed directly to fn for each API call. Note that this function sets count, limit, and order as part of its work.

def iter_computed_permissions( fn: Callable[..., arvados.api_resources.ArvadosAPIRequest], order_key: str = 'user_uuid', num_retries: int = 0, ascending: bool = True, key_fields: Container[str] = ('user_uuid', 'target_uuid'), **kwargs: Any) -> Iterator[Dict[str, Any]]: View Source

225def iter_computed_permissions(
226        fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
227        order_key: str='user_uuid',
228        num_retries: int=0,
229        ascending: bool=True,
230        key_fields: Container[str]=('user_uuid', 'target_uuid'),
231        **kwargs: Any,
232) -> Iterator[Dict[str, Any]]:
233    """Iterate all `computed_permission` resources
234
235    This method is the same as `keyset_list_all`, except that its
236    default arguments are suitable for the computed_permissions API.
237
238    Arguments:
239
240    * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] ---
241      see `keyset_list_all`.  Typically this is an instance of
242      `arvados.api_resources.ComputedPermissions.list`.  Given an
243      Arvados API client named `arv`, typical usage is
244      `iter_computed_permissions(arv.computed_permissions().list)`.
245
246    * order_key: str --- see `keyset_list_all`.  Default
247      `'user_uuid'`.
248
249    * num_retries: int --- see `keyset_list_all`.
250
251    * ascending: bool --- see `keyset_list_all`.
252
253    * key_fields: Container[str] --- see `keyset_list_all`. Default
254      `('user_uuid', 'target_uuid')`.
255
256    """
257    return keyset_list_all(
258        fn=fn,
259        order_key=order_key,
260        num_retries=num_retries,
261        ascending=ascending,
262        key_fields=key_fields,
263        **kwargs)

Iterate all computed_permission resources

This method is the same as keyset_list_all, except that its default arguments are suitable for the computed_permissions API.

Arguments:

fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- see keyset_list_all. Typically this is an instance of arvados.api_resources.ComputedPermissions.list. Given an Arvados API client named arv, typical usage is iter_computed_permissions(arv.computed_permissions().list).
order_key: str --- see keyset_list_all. Default 'user_uuid'.
num_retries: int --- see keyset_list_all.
ascending: bool --- see keyset_list_all.
key_fields: Container[str] --- see keyset_list_all. Default ('user_uuid', 'target_uuid').

def ca_certs_path( fallback: ~T = '/var/lib/gitolite3/venv-pdoc/lib/python3.11/site-packages/certifi/cacert.pem') -> Union[str, ~T]: View Source

266def ca_certs_path(fallback: T=httplib2.CA_CERTS) -> Union[str, T]:
267    """Return the path of the best available source of CA certificates
268
269    This function checks various known paths that provide trusted CA
270    certificates, and returns the first one that exists. It checks:
271
272    * the path in the `SSL_CERT_FILE` environment variable (used by OpenSSL)
273    * `/etc/arvados/ca-certificates.crt`, respected by all Arvados software
274    * `/etc/ssl/certs/ca-certificates.crt`, the default store on Debian-based
275      distributions
276    * `/etc/pki/tls/certs/ca-bundle.crt`, the default store on Red Hat-based
277      distributions
278
279    If none of these paths exist, this function returns the value of `fallback`.
280
281    Arguments:
282
283    * fallback: T --- The value to return if none of the known paths exist.
284      The default value is the certificate store of Mozilla's trusted CAs
285      included with the Python [certifi][] package.
286
287    [certifi]: https://pypi.org/project/certifi/
288    """
289    for ca_certs_path in [
290        # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
291        # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
292        os.environ.get('SSL_CERT_FILE'),
293        # Arvados specific:
294        '/etc/arvados/ca-certificates.crt',
295        # Debian:
296        '/etc/ssl/certs/ca-certificates.crt',
297        # Red Hat:
298        '/etc/pki/tls/certs/ca-bundle.crt',
299        ]:
300        if ca_certs_path and os.path.exists(ca_certs_path):
301            return ca_certs_path
302    return fallback

Return the path of the best available source of CA certificates

This function checks various known paths that provide trusted CA certificates, and returns the first one that exists. It checks:

the path in the SSL_CERT_FILE environment variable (used by OpenSSL)
/etc/arvados/ca-certificates.crt, respected by all Arvados software
/etc/ssl/certs/ca-certificates.crt, the default store on Debian-based distributions
/etc/pki/tls/certs/ca-bundle.crt, the default store on Red Hat-based distributions

If none of these paths exist, this function returns the value of fallback.

Arguments:

fallback: T — The value to return if none of the known paths exist. The default value is the certificate store of Mozilla’s trusted CAs included with the Python certifi package.

def new_request_id() -> str: View Source

305def new_request_id() -> str:
306    """Return a random request ID
307
308    This function generates and returns a random string suitable for use as a
309    `X-Request-Id` header value in the Arvados API.
310    """
311    rid = "req-"
312    # 2**104 > 36**20 > 2**103
313    n = random.getrandbits(104)
314    for _ in range(20):
315        c = n % 36
316        if c < 10:
317            rid += chr(c+ord('0'))
318        else:
319            rid += chr(c+ord('a')-10)
320        n = n // 36
321    return rid

Return a random request ID

This function generates and returns a random string suitable for use as a X-Request-Id header value in the Arvados API.

def get_config_once(svc: arvados.api_resources.ArvadosAPIClient) -> Dict[str, Any]: View Source

324def get_config_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
325    """Return an Arvados cluster's configuration, with caching
326
327    This function gets and returns the Arvados configuration from the API
328    server. It caches the result on the client object and reuses it on any
329    future calls.
330
331    Arguments:
332
333    * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
334      object to use to retrieve and cache the Arvados cluster configuration.
335    """
336    if not svc._rootDesc.get('resources').get('configs', False):
337        # Old API server version, no config export endpoint
338        return {}
339    if not hasattr(svc, '_cached_config'):
340        svc._cached_config = svc.configs().get().execute()
341    return svc._cached_config

Return an Arvados cluster’s configuration, with caching

This function gets and returns the Arvados configuration from the API server. It caches the result on the client object and reuses it on any future calls.

Arguments:

svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client object to use to retrieve and cache the Arvados cluster configuration.

def get_vocabulary_once(svc: arvados.api_resources.ArvadosAPIClient) -> Dict[str, Any]: View Source

344def get_vocabulary_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
345    """Return an Arvados cluster's vocabulary, with caching
346
347    This function gets and returns the Arvados vocabulary from the API
348    server. It caches the result on the client object and reuses it on any
349    future calls.
350
351    .. HINT:: Low-level method
352       This is a relatively low-level wrapper around the Arvados API. Most
353       users will prefer to use `arvados.vocabulary.load_vocabulary`.
354
355    Arguments:
356
357    * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
358      object to use to retrieve and cache the Arvados cluster vocabulary.
359    """
360    if not svc._rootDesc.get('resources').get('vocabularies', False):
361        # Old API server version, no vocabulary export endpoint
362        return {}
363    if not hasattr(svc, '_cached_vocabulary'):
364        svc._cached_vocabulary = svc.vocabularies().get().execute()
365    return svc._cached_vocabulary

Return an Arvados cluster’s vocabulary, with caching

This function gets and returns the Arvados vocabulary from the API server. It caches the result on the client object and reuses it on any future calls.

Arguments:

svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client object to use to retrieve and cache the Arvados cluster vocabulary.

def trim_name(collectionname: str) -> str: View Source

368def trim_name(collectionname: str) -> str:
369    """Limit the length of a name to fit within Arvados API limits
370
371    This function ensures that a string is short enough to use as an object
372    name in the Arvados API, leaving room for text that may be added by the
373    `ensure_unique_name` argument. If the source name is short enough, it is
374    returned unchanged. Otherwise, this function returns a string with excess
375    characters removed from the middle of the source string and replaced with
376    an ellipsis.
377
378    Arguments:
379
380    * collectionname: str --- The desired source name
381    """
382    max_name_len = 254 - 28
383
384    if len(collectionname) > max_name_len:
385        over = len(collectionname) - max_name_len
386        split = int(max_name_len/2)
387        collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
388
389    return collectionname

Limit the length of a name to fit within Arvados API limits

This function ensures that a string is short enough to use as an object name in the Arvados API, leaving room for text that may be added by the ensure_unique_name argument. If the source name is short enough, it is returned unchanged. Otherwise, this function returns a string with excess characters removed from the middle of the source string and replaced with an ellipsis.

Arguments:

collectionname: str --- The desired source name

def iter_storage_classes( config: Dict[str, Any], check: Callable[[Dict[str, Any]], bool] = operator.methodcaller('get', 'Default'), fallback: str = 'default') -> Iterator[str]: View Source

392def iter_storage_classes(
393        config: Dict[str, Any],
394        check: Callable[[Dict[str, Any]], bool]=operator.methodcaller('get', 'Default'),
395        fallback: str="default",
396) -> Iterator[str]:
397    """Read storage classes from the API client config
398
399    This function iterates storage class names for classes in `config` that
400    pass `check`. If no matches are found but `fallback` is given, it is
401    yielded.
402    """
403    any_found = False
404    for key, value in config.get("StorageClasses", {}).items():
405        if check(value):
406            any_found = True
407            yield key
408    if fallback and not any_found:
409        yield fallback

Read storage classes from the API client config

This function iterates storage class names for classes in config that pass check. If no matches are found but fallback is given, it is yielded.