Configuration reference

The Arvados configuration is stored at /etc/arvados/config.yml

# Copyright (C) The Arvados Authors. All rights reserved.
#
# SPDX-License-Identifier: AGPL-3.0

# Do not use this file for site configuration. Create
# /etc/arvados/config.yml instead.
#
# The order of precedence (highest to lowest):
# 1. Legacy component-specific config files (deprecated)
# 2. /etc/arvados/config.yml
# 3. config.default.yml

Clusters:
  xxxxx:
    # Token used internally by Arvados components to authenticate to
    # one another. Use a string of at least 50 random alphanumerics.
    SystemRootToken: ""

    # Token to be included in all healthcheck requests. Disabled by default.
    # Server expects request header of the format "Authorization: Bearer xxx"
    ManagementToken: ""

    Services:

      # Each of the service sections below specifies InternalURLs
      # (each with optional ListenURL) and ExternalURL.
      #
      # InternalURLs specify how other Arvados service processes will
      # connect to the service. Typically these use internal hostnames
      # and high port numbers. Example:
      #
      # InternalURLs:
      #   "http://host1.internal.example:12345": {}
      #   "http://host2.internal.example:12345": {}
      #
      # ListenURL specifies the address and port the service process's
      # HTTP server should listen on, if different from the
      # InternalURL itself. Example, using an intermediate TLS proxy:
      #
      # InternalURLs:
      #   "https://host1.internal.example":
      #     ListenURL: "http://10.0.0.7:12345"
      #
      # When there are multiple InternalURLs configured, the service
      # process will try listening on each InternalURLs (using
      # ListenURL if provided) until one works. If you use a ListenURL
      # like "0.0.0.0" which can be bound on any machine, use an
      # environment variable
      # ARVADOS_SERVICE_INTERNAL_URL=http://host1.internal.example to
      # control which entry to use.
      #
      # ExternalURL specifies how applications/clients will connect to
      # the service, regardless of whether they are inside or outside
      # the cluster. Example:
      #
      # ExternalURL: "https://keep.zzzzz.example.com/"
      #
      # To avoid routing internal traffic through external networks,
      # use split-horizon DNS for ExternalURL host names: inside the
      # cluster's private network "host.zzzzz.example.com" resolves to
      # the host's private IP address, while outside the cluster
      # "host.zzzzz.example.com" resolves to the host's public IP
      # address (or its external gateway or load balancer).

      RailsAPI:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      Controller:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      Websocket:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      Keepbalance:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      GitHTTP:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      GitSSH:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      DispatchCloud:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      DispatchLSF:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      DispatchSLURM:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      Keepproxy:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      WebDAV:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        # Base URL for Workbench inline preview.  If blank, use
        # WebDAVDownload instead, and disable inline preview.
        # If both are empty, downloading collections from workbench
        # will be impossible.
        #
        # It is important to properly configure the download service
        # to migitate cross-site-scripting (XSS) attacks.  A HTML page
        # can be stored in collection.  If an attacker causes a victim
        # to visit that page through Workbench, it will be rendered by
        # the browser.  If all collections are served at the same
        # domain, the browser will consider collections as coming from
        # the same origin and having access to the same browsing data,
        # enabling malicious Javascript on that page to access Arvados
        # on behalf of the victim.
        #
        # This is mitigating by having separate domains for each
        # collection, or limiting preview to circumstances where the
        # collection is not accessed with the user's regular
        # full-access token.
        #
        # Serve preview links using uuid or pdh in subdomain
        # (requires wildcard DNS and TLS certificate)
        #   https://*.collections.uuid_prefix.arvadosapi.com
        #
        # Serve preview links using uuid or pdh in main domain
        # (requires wildcard DNS and TLS certificate)
        #   https://*--collections.uuid_prefix.arvadosapi.com
        #
        # Serve preview links by setting uuid or pdh in the path.
        # This configuration only allows previews of public data or
        # collection-sharing links, because these use the anonymous
        # user token or the token is already embedded in the URL.
        # Other data must be handled as downloads via WebDAVDownload:
        #   https://collections.uuid_prefix.arvadosapi.com
        #
        ExternalURL: ""

      WebDAVDownload:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        # Base URL for download links. If blank, serve links to WebDAV
        # with disposition=attachment query param.  Unlike preview links,
        # browsers do not render attachments, so there is no risk of XSS.
        #
        # If WebDAVDownload is blank, and WebDAV uses a
        # single-origin form, then Workbench will show an error page
        #
        # Serve download links by setting uuid or pdh in the path:
        #   https://download.uuid_prefix.arvadosapi.com
        #
        ExternalURL: ""

      Keepstore:
        InternalURLs:
          SAMPLE:
            ListenURL: ""
            # Rendezvous is normally empty/omitted. When changing the
            # URL of a Keepstore service, Rendezvous should be set to
            # the old URL (with trailing slash omitted) to preserve
            # rendezvous ordering.
            Rendezvous: ""
        ExternalURL: ""
      Composer:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      WebShell:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        # ShellInABox service endpoint URL for a given VM.  If empty, do not
        # offer web shell logins.
        #
        # E.g., using a path-based proxy server to forward connections to shell hosts:
        # https://webshell.uuid_prefix.arvadosapi.com
        #
        # E.g., using a name-based proxy server to forward connections to shell hosts:
        # https://*.webshell.uuid_prefix.arvadosapi.com
        ExternalURL: ""
      Workbench1:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      Workbench2:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""
      Health:
        InternalURLs: {SAMPLE: {ListenURL: ""}}
        ExternalURL: ""

    PostgreSQL:
      # max concurrent connections per arvados server daemon
      ConnectionPool: 32
      Connection:
        # All parameters here are passed to the PG client library in a connection string;
        # see https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS
        host: ""
        port: ""
        user: ""
        password: ""
        dbname: ""
        SAMPLE: ""
    API:
      # Limits for how long a client token created by regular users can be valid,
      # and also is used as a default expiration policy when no expiration date is
      # specified.
      # Default value zero means token expirations don't get clamped and no
      # default expiration is set.
      MaxTokenLifetime: 0s

      # Maximum size (in bytes) allowed for a single API request.  This
      # limit is published in the discovery document for use by clients.
      # Note: You must separately configure the upstream web server or
      # proxy to actually enforce the desired maximum request size on the
      # server side.
      MaxRequestSize: 134217728

      # Limit the number of bytes read from the database during an index
      # request (by retrieving and returning fewer rows than would
      # normally be returned in a single response).
      # Note 1: This setting never reduces the number of returned rows to
      # zero, no matter how big the first data row is.
      # Note 2: Currently, this is only checked against a specific set of
      # columns that tend to get large (collections.manifest_text,
      # containers.mounts, workflows.definition). Other fields (e.g.,
      # "properties" hashes) are not counted against this limit.
      MaxIndexDatabaseRead: 134217728

      # Maximum number of items to return when responding to a APIs that
      # can return partial result sets using limit and offset parameters
      # (e.g., *.index, groups.contents). If a request specifies a "limit"
      # parameter higher than this value, this value is used instead.
      MaxItemsPerResponse: 1000

      # Maximum number of concurrent requests to process concurrently
      # in a single service process, or 0 for no limit.
      #
      # Note this applies to all Arvados services (controller, webdav,
      # websockets, etc.). Concurrency in the controller service is
      # also effectively limited by MaxConcurrentRailsRequests (see
      # below) because most controller requests proxy through to the
      # RailsAPI service.
      #
      # HTTP proxies and load balancers downstream of arvados services
      # should be configured to allow at least {MaxConcurrentRequest +
      # MaxQueuedRequests + MaxGatewayTunnels} concurrent requests.
      MaxConcurrentRequests: 64

      # Maximum number of concurrent requests to process concurrently
      # in a single RailsAPI service process, or 0 for no limit.
      MaxConcurrentRailsRequests: 8

      # Maximum number of incoming requests to hold in a priority
      # queue waiting for one of the MaxConcurrentRequests slots to be
      # free. When the queue is longer than this, respond 503 to the
      # lowest priority request.
      #
      # If MaxQueuedRequests is 0, respond 503 immediately to
      # additional requests while at the MaxConcurrentRequests limit.
      MaxQueuedRequests: 128

      # Maximum time a "lock container" request is allowed to wait in
      # the incoming request queue before returning 503.
      MaxQueueTimeForLockRequests: 2s

      # Maximum number of active gateway tunnel connections. One slot
      # is consumed by each "container shell" connection. If using an
      # HPC dispatcher (LSF or Slurm), one slot is consumed by each
      # running container.  These do not count toward
      # MaxConcurrentRequests.
      MaxGatewayTunnels: 1000

      # Fraction of MaxConcurrentRequests that can be "log create"
      # messages at any given time.  This is to prevent logging
      # updates from crowding out more important requests.
      LogCreateRequestFraction: 0.50

      # Maximum number of 64MiB memory buffers per Keepstore server process, or
      # 0 for no limit. When this limit is reached, up to
      # (MaxConcurrentRequests - MaxKeepBlobBuffers) HTTP requests requiring
      # buffers (like GET and PUT) will wait for buffer space to be released.
      # Any HTTP requests beyond MaxConcurrentRequests will receive an
      # immediate 503 response.
      #
      # MaxKeepBlobBuffers should be set such that (MaxKeepBlobBuffers * 64MiB
      # * 1.1) fits comfortably in memory. On a host dedicated to running
      # Keepstore, divide total memory by 88MiB to suggest a suitable value.
      # For example, if grep MemTotal /proc/meminfo reports MemTotal: 7125440
      # kB, compute 7125440 / (88 * 1024)=79 and set MaxKeepBlobBuffers: 79
      MaxKeepBlobBuffers: 128

      # API methods to disable. Disabled methods are not listed in the
      # discovery document, and respond 404 to all requests.
      # Example: {"jobs.create":{}, "pipeline_instances.create": {}}
      DisabledAPIs: {}

      # Interval (seconds) between asynchronous permission view updates. Any
      # permission-updating API called with the 'async' parameter schedules a an
      # update on the permission view in the future, if not already scheduled.
      AsyncPermissionsUpdateInterval: 20s

      # Maximum number of concurrent outgoing requests to make while
      # serving a single incoming multi-cluster (federated) request.
      MaxRequestAmplification: 4

      # Maximum wall clock time to spend handling an incoming request.
      RequestTimeout: 5m

      # Websocket will send a periodic empty event after 'SendTimeout'
      # if there is no other activity to maintain the connection /
      # detect dropped connections.
      SendTimeout: 60s

      WebsocketClientEventQueue: 64
      WebsocketServerEventQueue: 4

      # Timeout on requests to internal Keep services.
      KeepServiceRequestTimeout: 15s

      # Vocabulary file path, local to the node running the controller.
      # This JSON file should contain the description of what's allowed
      # as object's metadata. Its format is described at:
      # https://doc.arvados.org/admin/metadata-vocabulary.html
      VocabularyPath: ""

      # If true, a project must have a non-empty description field in
      # order to be frozen.
      FreezeProjectRequiresDescription: false

      # Project properties that must have non-empty values in order to
      # freeze a project. Example: "property_name": {}
      FreezeProjectRequiresProperties:
        SAMPLE: {}

      # If true, only an admin user can un-freeze a project. If false,
      # any user with "manage" permission can un-freeze.
      UnfreezeProjectRequiresAdmin: false

      # (Experimental) Use row-level locking on update API calls.
      LockBeforeUpdate: false

    Users:
      # Config parameters to automatically setup new users.  If enabled,
      # this users will be able to self-activate.  Enable this if you want
      # to run an open instance where anyone can create an account and use
      # the system without requiring manual approval.
      #
      # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.
      # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
      AutoSetupNewUsers: false
      AutoSetupNewUsersWithVmUUID: ""
      AutoSetupNewUsersWithRepository: false
      AutoSetupUsernameBlacklist:
        arvados: {}
        git: {}
        gitolite: {}
        gitolite-admin: {}
        root: {}
        syslog: {}
        SAMPLE: {}

      # When NewUsersAreActive is set to true, new users will be active
      # immediately.  This skips the "self-activate" step which enforces
      # user agreements.  Should only be enabled for development.
      NewUsersAreActive: false

      # Newly activated users (whether set up by an admin or via
      # AutoSetupNewUsers) immediately become visible to other active
      # users.
      #
      # On a multi-tenant cluster, where the intent is for users to be
      # invisible to one another unless they have been added to the
      # same group(s) via Workbench admin interface, change this to
      # false.
      ActivatedUsersAreVisibleToOthers: true

      # If a user creates an account with this email address, they
      # will be automatically set to admin.
      AutoAdminUserWithEmail: ""

      # If AutoAdminFirstUser is set to true, the first user to log in when no
      # other admin users exist will automatically become an admin user.
      AutoAdminFirstUser: false

      # Recipient for notification email sent out when a user sets a
      # profile on their account.
      UserProfileNotificationAddress: ""

      # When sending a NewUser, NewInactiveUser, or UserProfile
      # notification, this is the 'From' address to use
      AdminNotifierEmailFrom: arvados@example.com

      # Prefix for email subjects for NewUser and NewInactiveUser emails
      EmailSubjectPrefix: "[ARVADOS] "

      # When sending a welcome email to the user, the 'From' address to use
      UserNotifierEmailFrom: arvados@example.com

      # The welcome email sent to new users will be blind copied to
      # these addresses.
      UserNotifierEmailBcc:
        SAMPLE: {}

      # Recipients for notification email sent out when a user account
      # is created and already set up to be able to log in
      NewUserNotificationRecipients:
        SAMPLE: {}

      # Recipients for notification email sent out when a user account
      # has been created but the user cannot log in until they are
      # set up by an admin.
      NewInactiveUserNotificationRecipients:
        SAMPLE: {}

      # Set AnonymousUserToken to enable anonymous user access. Populate this
      # field with a random string at least 50 characters long.
      AnonymousUserToken: ""

      # The login provider for a user may supply a primary email
      # address and one or more alternate email addresses.  If a new
      # user has an alternate email address with the domain given
      # here, use the username from the alternate email to generate
      # the user's Arvados username. Otherwise, the username from
      # user's primary email address is used for the Arvados username.
      # Currently implemented for OpenID Connect only.
      PreferDomainForUsername: ""

      # Ruby ERB template used for the email sent out to users when
      # they have been set up.
      UserSetupMailText: |
        <% if not @user.full_name.empty? -%>
        <%= @user.full_name %>,
        <% else -%>
        Hi there,
        <% end -%>

        Your Arvados account has been set up.  You can log in at

        <%= Rails.configuration.Services.Workbench1.ExternalURL %>

        Thanks,
        Your Arvados administrator.

      # If RoleGroupsVisibleToAll is true, all role groups are visible
      # to all active users.
      #
      # If false, users must be granted permission to role groups in
      # order to see them. This is more appropriate for a multi-tenant
      # cluster.
      RoleGroupsVisibleToAll: true

      # If CanCreateRoleGroups is true, regular (non-admin) users can
      # create new role groups.
      #
      # If false, only admins can create new role groups.
      CanCreateRoleGroups: true

      # During each period, a log entry with event_type="activity"
      # will be recorded for each user who is active during that
      # period. The object_uuid attribute will indicate the user's
      # UUID.
      #
      # Multiple log entries for the same user may be generated during
      # a period if there are multiple controller processes or a
      # controller process is restarted.
      #
      # Use 0 to disable activity logging.
      ActivityLoggingPeriod: 24h

      # The SyncUser* options control what system resources are managed by
      # arvados-login-sync on shell nodes. They correspond to:
      # * SyncUserAccounts: The user's Unix account on the shell node
      # * SyncUserGroups: The group memberships of that account
      # * SyncUserSSHKeys: Whether to authorize the user's Arvados SSH keys
      # * SyncUserAPITokens: Whether to set up the user's Arvados API token
      # All default to true.
      SyncUserAccounts: true
      SyncUserGroups: true
      SyncUserSSHKeys: true
      SyncUserAPITokens: true

      # If SyncUserGroups=true, then arvados-login-sync will ensure that all
      # managed accounts are members of the Unix groups listed in
      # SyncRequiredGroups, in addition to any groups listed in their Arvados
      # login permission. The default list includes the "fuse" group so
      # users can use arv-mount. You can require no groups by specifying an
      # empty list (i.e., `SyncRequiredGroups: []`).
      SyncRequiredGroups:
        - fuse

      # SyncIgnoredGroups is a list of group names. arvados-login-sync will
      # never modify these groups. If user login permissions list any groups
      # in SyncIgnoredGroups, they will be ignored. If a user's Unix account
      # belongs to any of these groups, arvados-login-sync will not remove
      # the account from that group. The default is a set of particularly
      # security-sensitive groups across Debian- and Red Hat-based
      # distributions.
      SyncIgnoredGroups:
        - adm
        - disk
        - kmem
        - mem
        - root
        - shadow
        - staff
        - sudo
        - sys
        - utempter
        - utmp
        - wheel

    AuditLogs:
      # Time to keep audit logs, in seconds. (An audit log is a row added
      # to the "logs" table in the PostgreSQL database each time an
      # Arvados object is created, modified, or deleted.)
      #
      # Currently, websocket event notifications rely on audit logs, so
      # this should not be set lower than 300 (5 minutes).
      MaxAge: 336h

      # Maximum number of log rows to delete in a single SQL transaction.
      #
      # If MaxDeleteBatch is 0, log entries will never be
      # deleted by Arvados. Cleanup can be done by an external process
      # without affecting any Arvados system processes, as long as very
      # recent (<5 minutes old) logs are not deleted.
      #
      # 100000 is a reasonable batch size for most sites.
      MaxDeleteBatch: 0

      # Attributes to suppress in events and audit logs.  Notably,
      # specifying {"manifest_text": {}} here typically makes the database
      # smaller and faster.
      #
      # Warning: Using any non-empty value here can have undesirable side
      # effects for any client or component that relies on event logs.
      # Use at your own risk.
      UnloggedAttributes: {}

    SystemLogs:

      # Logging threshold: panic, fatal, error, warn, info, debug, or
      # trace
      LogLevel: info

      # Logging format: json or text
      Format: json

      # Maximum characters of (JSON-encoded) query parameters to include
      # in each request log entry. When params exceed this size, they will
      # be JSON-encoded, truncated to this size, and logged as
      # params_truncated.
      MaxRequestLogParamsSize: 2000

      # In all services except RailsAPI, periodically check whether
      # the incoming HTTP request queue is nearly full (see
      # MaxConcurrentRequests) and, if so, write a snapshot of the
      # request queue to {service}-requests.json in the specified
      # directory.
      #
      # Leave blank to disable.
      RequestQueueDumpDirectory: ""

    Collections:

      # Enable access controls for data stored in Keep. This should
      # always be set to true on a production cluster.
      BlobSigning: true

      # BlobSigningKey is a string of alphanumeric characters used to
      # generate permission signatures for Keep locators. It must be
      # identical to the permission key given to Keep. IMPORTANT: This
      # is a site secret. It should be at least 50 characters.
      #
      # Modifying BlobSigningKey will invalidate all existing
      # signatures, which can cause programs to fail (e.g., arv-put,
      # arv-get, and Crunch jobs).  To avoid errors, rotate keys only
      # when no such processes are running.
      BlobSigningKey: ""

      # Enable garbage collection of unreferenced blobs in Keep.
      BlobTrash: true

      # Time to leave unreferenced blobs in "trashed" state before
      # deleting them, or 0 to skip the "trashed" state entirely and
      # delete unreferenced blobs.
      #
      # If you use any Amazon S3 buckets as storage volumes, this
      # must be at least 24h to avoid occasional data loss.
      BlobTrashLifetime: 336h

      # How often to check for (and delete) trashed blocks whose
      # BlobTrashLifetime has expired.
      BlobTrashCheckInterval: 24h

      # Maximum number of concurrent "trash blob" and "delete trashed
      # blob" operations conducted by a single keepstore process. Each
      # of these can be set to 0 to disable the respective operation.
      #
      # If BlobTrashLifetime is zero, "trash" and "delete trash"
      # happen at once, so only the lower of these two values is used.
      BlobTrashConcurrency: 4
      BlobDeleteConcurrency: 4

      # Maximum number of concurrent "create additional replica of
      # existing blob" operations conducted by a single keepstore
      # process.
      BlobReplicateConcurrency: 4

      # Default replication level for collections. This is used when a
      # collection's replication_desired attribute is nil.
      DefaultReplication: 2

      # BlobSigningTTL determines the minimum lifetime of transient
      # data, i.e., blocks that are not referenced by
      # collections. Unreferenced blocks exist for two reasons:
      #
      # 1) A data block must be written to a disk/cloud backend device
      # before a collection can be created/updated with a reference to
      # it.
      #
      # 2) Deleting or updating a collection can remove the last
      # remaining reference to a data block.
      #
      # If BlobSigningTTL is too short, long-running
      # processes/containers will fail when they take too long (a)
      # between writing blocks and writing collections that reference
      # them, or (b) between reading collections and reading the
      # referenced blocks.
      #
      # If BlobSigningTTL is too long, data will still be stored long
      # after the referring collections are deleted, and you will
      # needlessly fill up disks or waste money on cloud storage.
      #
      # Modifying BlobSigningTTL invalidates existing signatures; see
      # BlobSigningKey note above.
      #
      # The default is 2 weeks.
      BlobSigningTTL: 336h

      # When running keep-balance, this is the destination filename for
      # the list of lost block hashes if there are any, one per line.
      # Updated automically during each successful run.
      BlobMissingReport: ""

      # keep-balance operates periodically, i.e.: do a
      # scan/balance operation, sleep, repeat.
      #
      # BalancePeriod determines the interval between start times of
      # successive scan/balance operations. If a scan/balance operation
      # takes longer than BalancePeriod, the next one will follow it
      # immediately.
      #
      # If SIGUSR1 is received during an idle period between operations,
      # the next operation will start immediately.
      BalancePeriod: 6h

      # Limits the number of collections retrieved by keep-balance per
      # API transaction. If this is zero, page size is
      # determined by the API server's own page size limits (see
      # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
      BalanceCollectionBatch: 0

      # The size of keep-balance's internal queue of
      # collections. Higher values may improve throughput by allowing
      # keep-balance to fetch collections from the database while the
      # current collection are still being processed, at the expense of
      # using more memory.  If this is zero or omitted, pages are
      # processed serially.
      BalanceCollectionBuffers: 4

      # Maximum time for a rebalancing run. This ensures keep-balance
      # eventually gives up and retries if, for example, a network
      # error causes a hung connection that is never closed by the
      # OS. It should be long enough that it doesn't interrupt a
      # long-running balancing operation.
      BalanceTimeout: 6h

      # Maximum number of replication_confirmed /
      # storage_classes_confirmed updates to write to the database
      # after a rebalancing run. When many updates are needed, this
      # spreads them over a few runs rather than applying them all at
      # once.
      BalanceUpdateLimit: 100000

      # Maximum number of "pull block from other server" and "trash
      # block" requests to send to each keepstore server at a
      # time. Smaller values use less memory in keepstore and
      # keep-balance. Larger values allow more progress per
      # keep-balance iteration. A zero value computes all of the
      # needed changes but does not apply any.
      BalancePullLimit: 100000
      BalanceTrashLimit: 100000

      # Default lifetime for ephemeral collections: 2 weeks. This must not
      # be less than BlobSigningTTL.
      DefaultTrashLifetime: 336h

      # Interval (seconds) between trash sweeps. During a trash sweep,
      # collections are marked as trash if their trash_at time has
      # arrived, and deleted if their delete_at time has arrived.
      TrashSweepInterval: 60s

      # If true, enable collection versioning.
      # When a collection's preserve_version field is true or the current version
      # is older than the amount of seconds defined on PreserveVersionIfIdle,
      # a snapshot of the collection's previous state is created and linked to
      # the current collection.
      CollectionVersioning: true

      #   0s = auto-create a new version on every update.
      #  -1s = never auto-create new versions.
      # > 0s = auto-create a new version when older than the specified number of seconds.
      PreserveVersionIfIdle: 10s

      # If non-empty, allow project and collection names to contain
      # the "/" character (slash/stroke/solidus), and replace "/" with
      # the given string in the filesystem hierarchy presented by
      # WebDAV. Example values are "%2f" and "{slash}". Names that
      # contain the substitution string itself may result in confusing
      # behavior, so a value like "_" is not recommended.
      #
      # If the default empty value is used, the server will reject
      # requests to create or rename a collection when the new name
      # contains "/".
      #
      # If the value "/" is used, project and collection names
      # containing "/" will be allowed, but they will not be
      # accessible via WebDAV.
      #
      # Use of this feature is not recommended, if it can be avoided.
      ForwardSlashNameSubstitution: ""

      # Include "folder objects" in S3 ListObjects responses.
      S3FolderObjects: true

      # Managed collection properties. At creation time, if the client didn't
      # provide the listed keys, they will be automatically populated following
      # one of the following behaviors:
      #
      # * UUID of the user who owns the containing project.
      #   responsible_person_uuid: {Function: original_owner, Protected: true}
      #
      # * Default concrete value.
      #   foo_bar: {Value: baz, Protected: false}
      #
      # If Protected is true, only an admin user can modify its value.
      ManagedProperties:
        SAMPLE: {Function: original_owner, Protected: true}

      # In "trust all content" mode, Workbench will redirect download
      # requests to WebDAV preview link, even in the cases when
      # WebDAV would have to expose XSS vulnerabilities in order to
      # handle the redirect (see discussion on Services.WebDAV).
      #
      # This setting has no effect in the recommended configuration, where the
      # WebDAV service is configured to have a separate domain for every
      # collection and XSS protection is provided by browsers' same-origin
      # policy.
      #
      # The default setting (false) is appropriate for a multi-user site.
      TrustAllContent: false

      # Cache parameters for WebDAV content serving:
      WebDAVCache:
        # Time to cache manifests, permission checks, and sessions.
        TTL: 300s

        # Maximum amount of data cached in /var/cache/arvados/keep.
        # Can be given as a percentage ("10%") or a number of bytes
        # ("10 GiB")
        DiskCacheSize: 10%

        # Approximate memory limit (in bytes) for session cache.
        #
        # Note this applies to the in-memory representation of
        # projects and collections -- metadata, block locators,
        # filenames, etc. -- not the file data itself (see
        # DiskCacheSize).
        MaxCollectionBytes: 100 MB

        # Persistent sessions.
        MaxSessions: 100

      # Selectively set permissions for regular users and admins to
      # download or upload data files using the upload/download
      # features for Workbench, WebDAV and S3 API support.
      WebDAVPermission:
        User:
          Download: true
          Upload: true
        Admin:
          Download: true
          Upload: true

      # Selectively set permissions for regular users and admins to be
      # able to download or upload blocks using arv-put and
      # arv-get from outside the cluster.
      KeepproxyPermission:
        User:
          Download: true
          Upload: true
        Admin:
          Download: true
          Upload: true

      # Post upload / download events to the API server logs table, so
      # that they can be included in the arv-user-activity report.
      # You can disable this if you find that it is creating excess
      # load on the API server and you don't need it.
      WebDAVLogEvents: true

    Login:
      # One of the following mechanisms (Google, PAM, LDAP, or
      # LoginCluster) should be enabled; see
      # https://doc.arvados.org/install/setup-login.html

      Google:
        # Authenticate with Google.
        Enable: false

        # Use the Google Cloud console to enable the People API (APIs
        # and Services > Enable APIs and services > Google People API
        # > Enable), generate a Client ID and secret (APIs and
        # Services > Credentials > Create credentials > OAuth client
        # ID > Web application) and add your controller's /login URL
        # (e.g., "https://zzzzz.example.com/login") as an authorized
        # redirect URL.
        ClientID: ""
        ClientSecret: ""

        # Allow users to log in to existing accounts using any verified
        # email address listed by their Google account. If true, the
        # Google People API must be enabled in order for Google login to
        # work. If false, only the primary email address will be used.
        AlternateEmailAddresses: true

        # Send additional parameters with authentication requests. See
        # https://developers.google.com/identity/protocols/oauth2/openid-connect#authenticationuriparameters
        # for a list of supported parameters.
        AuthenticationRequestParameters:
          # Show the "choose which Google account" page, even if the
          # client is currently logged in to exactly one Google
          # account.
          prompt: select_account

          SAMPLE: ""

      OpenIDConnect:
        # Authenticate with an OpenID Connect provider.
        Enable: false

        # Issuer URL, e.g., "https://login.example.com".
        #
        # This must be exactly equal to the URL returned by the issuer
        # itself in its config response ("isser" key). If the
        # configured value is "https://example" and the provider
        # returns "https://example:443" or "https://example/" then
        # login will fail, even though those URLs are equivalent
        # (RFC3986).
        Issuer: ""

        # Your client ID and client secret (supplied by the provider).
        ClientID: ""
        ClientSecret: ""

        # OpenID claim field containing the user's email
        # address. Normally "email"; see
        # https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims
        EmailClaim: "email"

        # OpenID claim field containing the email verification
        # flag. Normally "email_verified".  To accept every returned
        # email address without checking a "verified" field at all,
        # use an empty string "".
        EmailVerifiedClaim: "email_verified"

        # OpenID claim field containing the user's preferred
        # username. If empty, use the mailbox part of the user's email
        # address.
        UsernameClaim: ""

        # Send additional parameters with authentication requests,
        # like {display: page, prompt: consent}. See
        # https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest
        # and refer to your provider's documentation for supported
        # parameters.
        AuthenticationRequestParameters:
          SAMPLE: ""

        # Accept an OIDC access token as an API token if the OIDC
        # provider's UserInfo endpoint accepts it.
        #
        # AcceptAccessTokenScope should also be used when enabling
        # this feature.
        AcceptAccessToken: false

        # Before accepting an OIDC access token as an API token, first
        # check that it is a JWT whose "scope" value includes this
        # value. Example: "https://zzzzz.example.com/" (your Arvados
        # API endpoint).
        #
        # If this value is empty and AcceptAccessToken is true, all
        # access tokens will be accepted regardless of scope,
        # including non-JWT tokens. This is not recommended.
        AcceptAccessTokenScope: ""

      PAM:
        # Use PAM to authenticate users.
        Enable: false

        # PAM service name. PAM will apply the policy in the
        # corresponding config file (e.g., /etc/pam.d/arvados) or, if
        # there is none, the default "other" config.
        Service: arvados

        # Domain name (e.g., "example.com") to use to construct the
        # user's email address if PAM authentication returns a
        # username with no "@". If empty, use the PAM username as the
        # user's email address, whether or not it contains "@".
        #
        # Note that the email address is used as the primary key for
        # user records when logging in. Therefore, if you change
        # PAMDefaultEmailDomain after the initial installation, you
        # should also update existing user records to reflect the new
        # domain. Otherwise, next time those users log in, they will
        # be given new accounts instead of accessing their existing
        # accounts.
        DefaultEmailDomain: ""

      LDAP:
        # Use an LDAP service to authenticate users.
        Enable: false

        # Server URL, like "ldap://ldapserver.example.com:389" or
        # "ldaps://ldapserver.example.com:636".
        URL: "ldap://ldap:389"

        # Use StartTLS upon connecting to the server.
        StartTLS: true

        # Skip TLS certificate name verification.
        InsecureTLS: false

        # Mininum TLS version to negotiate when connecting to server
        # (ldaps://... or StartTLS). It may be necessary to set this
        # to "1.1" for compatibility with older LDAP servers that fail
        # with 'LDAP Result Code 200 "Network Error": TLS handshake
        # failed (tls: server selected unsupported protocol version
        # 301)'.
        #
        # If blank, use the recommended minimum version (1.2).
        MinTLSVersion: ""

        # Strip the @domain part if a user supplies an email-style
        # username with this domain. If "*", strip any user-provided
        # domain. If "", never strip the domain part. Example:
        # "example.com"
        StripDomain: ""

        # If, after applying StripDomain, the username contains no "@"
        # character, append this domain to form an email-style
        # username. Example: "example.com"
        AppendDomain: ""

        # The LDAP attribute to filter on when looking up a username
        # (after applying StripDomain and AppendDomain).
        SearchAttribute: uid

        # Bind with this username (DN or UPN) and password when
        # looking up the user record.
        #
        # Example user: "cn=admin,dc=example,dc=com"
        SearchBindUser: ""
        SearchBindPassword: ""

        # Directory base for username lookup. Example:
        # "ou=Users,dc=example,dc=com"
        SearchBase: ""

        # Additional filters to apply when looking up users' LDAP
        # entries. This can be used to restrict access to a subset of
        # LDAP users, or to disambiguate users from other directory
        # entries that have the SearchAttribute present.
        #
        # Special characters in assertion values must be escaped (see
        # RFC4515).
        #
        # Example: "(objectClass=person)"
        SearchFilters: ""

        # LDAP attribute to use as the user's email address.
        #
        # Important: This must not be an attribute whose value can be
        # edited in the directory by the users themselves. Otherwise,
        # users can take over other users' Arvados accounts trivially
        # (email address is the primary key for Arvados accounts.)
        EmailAttribute: mail

        # LDAP attribute to use as the preferred Arvados username. If
        # no value is found (or this config is empty) the username
        # originally supplied by the user will be used.
        UsernameAttribute: uid

      Test:
        # Authenticate users listed here in the config file. This
        # feature is intended to be used in test environments, and
        # should not be used in production.
        Enable: false
        Users:
          SAMPLE:
            Email: alice@example.com
            Password: xyzzy

      # The cluster ID to delegate the user database.  When set,
      # logins on this cluster will be redirected to the login cluster
      # (login cluster must appear in RemoteClusters with Proxy: true)
      LoginCluster: ""

      # How long a cached token belonging to a remote cluster will
      # remain valid before it needs to be revalidated.
      RemoteTokenRefresh: 5m

      # How long a client token created from a login flow will be valid without
      # asking the user to re-login. Example values: 60m, 8h.
      # Default value zero means tokens don't have expiration.
      TokenLifetime: 0s

      # If true (default) tokens issued through login are allowed to create
      # new tokens.
      # If false, tokens issued through login are not allowed to
      # viewing/creating other tokens.  New tokens can only be created
      # by going through login again.
      IssueTrustedTokens: true

      # Origins (scheme://host[:port]) of clients trusted to receive
      # new tokens via login process.  The ExternalURLs of the local
      # Workbench1 and Workbench2 are trusted implicitly and do not
      # need to be listed here.  If this is a LoginCluster, you
      # probably want to include the other Workbench instances in the
      # federation in this list.
      #
      # A wildcard like "https://*.example" will match client URLs
      # like "https://a.example" and "https://a.b.c.example".
      #
      # Example:
      #
      # TrustedClients:
      #   "https://workbench.other-cluster.example": {}
      #   "https://workbench2.other-cluster.example": {}
      TrustedClients:
        SAMPLE: {}

      # Treat any origin whose host part is "localhost" or a private
      # IP address (e.g., http://10.0.0.123:3000/) as if it were
      # listed in TrustedClients.
      #
      # Intended only for test/development use. Not appropriate for
      # production use.
      TrustPrivateNetworks: false

    Git:
      # Path to git or gitolite-shell executable. Each authenticated
      # request will execute this program with the single argument "http-backend"
      GitCommand: /usr/bin/git

      # Path to Gitolite's home directory. If a non-empty path is given,
      # the CGI environment will be set up to support the use of
      # gitolite-shell as a GitCommand: for example, if GitoliteHome is
      # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,
      # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.
      GitoliteHome: ""

      # Git repositories must be readable by api server, or you won't be
      # able to submit crunch jobs. To pass the test suites, put a clone
      # of the arvados tree in {git_repositories_dir}/arvados.git or
      # {git_repositories_dir}/arvados/.git
      Repositories: /var/lib/arvados/git/repositories

    TLS:
      # Use "file:///var/lib/acme/live/example.com/cert" and
      # ".../privkey" to load externally managed certificates.
      Certificate: ""
      Key: ""

      # Accept invalid certificates when connecting to servers. Never
      # use this in production.
      Insecure: false

      ACME:
        # Obtain certificates automatically for ExternalURL domains
        # using an ACME server and http-01 validation.
        #
        # To use Let's Encrypt, specify "LE".  To use the Let's
        # Encrypt staging environment, specify "LE-staging".  To use a
        # different ACME server, specify the full directory URL
        # ("https://...").
        #
        # Note: this feature is not yet implemented in released
        # versions, only in the alpha/prerelease arvados-server-easy
        # package.
        #
        # Implies agreement with the server's terms of service.
        Server: ""

    Containers:
      # List of supported Docker Registry image formats that compute nodes
      # are able to use. `arv keep docker` will error out if a user tries
      # to store an image with an unsupported format. Use an empty array
      # to skip the compatibility check (and display a warning message to
      # that effect).
      #
      # Example for sites running docker < 1.10: {"v1": {}}
      # Example for sites running docker >= 1.10: {"v2": {}}
      # Example for disabling check: {}
      SupportedDockerImageFormats:
        "v2": {}
        SAMPLE: {}

      # Include details about job reuse decisions in the server log. This
      # causes additional database queries to run, so it should not be
      # enabled unless you expect to examine the resulting logs for
      # troubleshooting purposes.
      LogReuseDecisions: false

      # Default value for keep_cache_ram of a container's
      # runtime_constraints.  Note: this gets added to the RAM request
      # used to allocate a VM or submit an HPC job.
      #
      # If this is zero, container requests that don't specify RAM or
      # disk cache size will use a disk cache, sized to the
      # container's RAM requirement (but with minimum 2 GiB and
      # maximum 32 GiB).
      #
      # Note: If you change this value, containers that used the previous
      # default value will only be reused by container requests that
      # explicitly specify the previous value in their keep_cache_ram
      # runtime constraint.
      DefaultKeepCacheRAM: 0

      # Number of times a container can be unlocked before being
      # automatically cancelled.
      MaxDispatchAttempts: 10

      # Default value for container_count_max for container requests.  This is the
      # number of times Arvados will create a new container to satisfy a container
      # request.  If a container is cancelled it will retry a new container if
      # container_count < container_count_max on any container requests associated
      # with the cancelled container.
      MaxRetryAttempts: 3

      # Schedule all child containers on preemptible instances (e.g. AWS
      # Spot Instances) even if not requested by the submitter.
      #
      # If false, containers are scheduled on preemptible instances
      # only when requested by the submitter.
      #
      # This flag is ignored if no preemptible instance types are
      # configured, and has no effect on top-level containers.
      AlwaysUsePreemptibleInstances: false

      # Automatically add a preemptible variant for every
      # non-preemptible entry in InstanceTypes below. The maximum bid
      # price for the preemptible variant will be the non-preemptible
      # price multiplied by PreemptiblePriceFactor. If 0, preemptible
      # variants are not added automatically.
      #
      # A price factor of 1.0 is a reasonable starting point.
      PreemptiblePriceFactor: 0

      # When the lowest-priced instance type for a given container is
      # not available, try other instance types, up to the indicated
      # maximum price factor.
      #
      # For example, with AvailabilityPriceFactor 1.5, if the
      # lowest-cost instance type A suitable for a given container
      # costs $2/h, Arvados may run the container on any instance type
      # B costing $3/h or less when instance type A is not available
      # or an idle instance of type B is already running.
      MaximumPriceFactor: 1.5

      # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
      # cloud dispatcher for executing containers on worker VMs.
      # Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
      # and ends with "\n-----END RSA PRIVATE KEY-----\n".
      #
      # Use "file:///absolute/path/to/key" to load the key from a
      # separate file instead of embedding it in the configuration
      # file.
      DispatchPrivateKey: ""

      # Maximum time to wait for workers to come up before abandoning
      # stale locks from a previous dispatch process.
      StaleLockTimeout: 1m

      # The crunch-run command used to start a container on a worker node.
      #
      # When dispatching to cloud VMs, this is used only if
      # DeployRunnerBinary in the CloudVMs section is set to the empty
      # string.
      CrunchRunCommand: "crunch-run"

      # Extra arguments to add to crunch-run invocation
      # Example: ["--cgroup-parent-subsystem=memory"]
      CrunchRunArgumentsList: []

      # Extra RAM to reserve on the node, in addition to
      # the amount specified in the container's RuntimeConstraints
      ReserveExtraRAM: 550MiB

      # Minimum time between two attempts to run the same container
      MinRetryPeriod: 0s

      # Container runtime: "docker" (default) or "singularity"
      RuntimeEngine: docker

      # When running a container, run a dedicated keepstore process,
      # using the specified number of 64 MiB memory buffers per
      # allocated CPU core (VCPUs in the container's runtime
      # constraints). The dedicated keepstore handles I/O for
      # collections mounted in the container, as well as saving
      # container logs.
      #
      # A zero value disables this feature.
      #
      # In order for this feature to be activated, no volume may use
      # AccessViaHosts, and no writable volume may have Replication
      # lower than Collections.DefaultReplication. If these
      # requirements are not satisfied, the feature is disabled
      # automatically regardless of the value given here.
      #
      # When an HPC dispatcher is in use (see SLURM and LSF sections),
      # this feature depends on the operator to ensure an up-to-date
      # cluster configuration file (/etc/arvados/config.yml) is
      # available on all compute nodes. If it is missing or not
      # readable by the crunch-run user, the feature will be disabled
      # automatically. To read it from a different location, add a
      # "-config=/path/to/config.yml" argument to
      # CrunchRunArgumentsList above.
      #
      # When the cloud dispatcher is in use (see CloudVMs section) and
      # this configuration is enabled, the entire cluster
      # configuration file, including the system root token, is copied
      # to the worker node and held in memory for the duration of the
      # container.
      LocalKeepBlobBuffersPerVCPU: 1

      # When running a dedicated keepstore process for a container
      # (see LocalKeepBlobBuffersPerVCPU), write keepstore log
      # messages to keepstore.txt in the container's log collection.
      #
      # These log messages can reveal some volume configuration
      # details, error messages from the cloud storage provider, etc.,
      # which are not otherwise visible to users.
      #
      # Accepted values:
      # * "none" -- no keepstore.txt file
      # * "all" -- all logs, including request and response lines
      # * "errors" -- all logs except "response" logs with 2xx
      #   response codes and "request" logs
      LocalKeepLogsToContainerLog: none

      Logging:
        # Periodically (see SweepInterval) Arvados will check for
        # containers that have been finished for at least this long,
        # and delete their stdout, stderr, arv-mount, crunch-run, and
        # crunchstat logs from the logs table.
        MaxAge: 720h

        # How often to delete cached log entries for finished
        # containers (see MaxAge).
        SweepInterval: 12h

        # These two settings control how frequently log events are flushed to the
        # database.  Log lines are buffered until either crunch_log_bytes_per_event
        # has been reached or crunch_log_seconds_between_events has elapsed since
        # the last flush.
        LogBytesPerEvent: 4096
        LogSecondsBetweenEvents: 5s

        # The sample period for throttling logs.
        LogThrottlePeriod: 60s

        # Maximum number of bytes that job can log over crunch_log_throttle_period
        # before being silenced until the end of the period.
        LogThrottleBytes: 65536

        # Maximum number of lines that job can log over crunch_log_throttle_period
        # before being silenced until the end of the period.
        LogThrottleLines: 1024

        # Maximum bytes that may be logged as legacy log events
        # (records posted to the "logs" table). Starting with Arvados
        # 2.7, container live logging has migrated to a new system
        # (polling the container request live log endpoint) and this
        # value should be 0.  As of this writing, the container will
        # still create a single log on the API server, noting for that
        # log events are throttled.
        LimitLogBytesPerJob: 0

        LogPartialLineThrottlePeriod: 5s

        # Container logs are written to Keep and saved in a
        # collection, which is updated periodically while the
        # container runs.  This value sets the interval between
        # collection updates.
        LogUpdatePeriod: 30m

        # The log collection is also updated when the specified amount of
        # log data (given in bytes) is produced in less than one update
        # period.
        LogUpdateSize: 32MiB

      ShellAccess:
        # An admin user can use "arvados-client shell" to start an
        # interactive shell (with any user ID) in any running
        # container.
        Admin: false

        # Any user can use "arvados-client shell" to start an
        # interactive shell (with any user ID) in any running
        # container that they started, provided it isn't also
        # associated with a different user's container request.
        #
        # Interactive sessions make it easy to alter the container's
        # runtime environment in ways that aren't recorded or
        # reproducible. Consider the implications for automatic
        # container reuse before enabling and using this feature. In
        # particular, note that starting an interactive session does
        # not disqualify a container from being reused by a different
        # user/workflow in the future.
        User: false

      SLURM:
        PrioritySpread: 0
        SbatchArgumentsList: []
        SbatchEnvironmentVariables:
          SAMPLE: ""
        Managed:
          # Path to dns server configuration directory
          # (e.g. /etc/unbound.d/conf.d). If false, do not write any config
          # files or touch restart.txt (see below).
          DNSServerConfDir: ""

          # Template file for the dns server host snippets. See
          # unbound.template in this directory for an example. If false, do
          # not write any config files.
          DNSServerConfTemplate: ""

          # String to write to {dns_server_conf_dir}/restart.txt (with a
          # trailing newline) after updating local data. If false, do not
          # open or write the restart.txt file.
          DNSServerReloadCommand: ""

          # Command to run after each DNS update. Template variables will be
          # substituted; see the "unbound" example below. If false, do not run
          # a command.
          DNSServerUpdateCommand: ""

          ComputeNodeDomain: ""
          ComputeNodeNameservers:
            "192.168.1.1": {}
            SAMPLE: {}

          # Hostname to assign to a compute node when it sends a "ping" and the
          # hostname in its Node record is nil.
          # During bootstrapping, the "ping" script is expected to notice the
          # hostname given in the ping response, and update its unix hostname
          # accordingly.
          # If false, leave the hostname alone (this is appropriate if your compute
          # nodes' hostnames are already assigned by some other mechanism).
          #
          # One way or another, the hostnames of your node records should agree
          # with your DNS records and your /etc/slurm-llnl/slurm.conf files.
          #
          # Example for compute0000, compute0001, ....:
          # assign_node_hostname: compute%<slot_number>04d
          # (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
          AssignNodeHostname: "compute%<slot_number>d"

      LSF:
        # Arguments to bsub when submitting Arvados containers as LSF jobs.
        #
        # Template variables starting with % will be substituted as follows:
        #
        # %U uuid
        # %C number of VCPUs
        # %M memory in MB
        # %T tmp in MB
        # %G number of GPU devices (runtime_constraints.cuda.device_count)
        # %W maximum run time in minutes (see MaxRunTimeOverhead and
        #    MaxRunTimeDefault below)
        #
        # Use %% to express a literal %. For example, the %%J in the
        # default argument list will be changed to %J, which is
        # interpreted by bsub itself.
        #
        # Note that the default arguments cause LSF to write two files
        # in /tmp on the compute node each time an Arvados container
        # runs. Ensure you have something in place to delete old files
        # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
        #
        # If ["-We", "%W"] or ["-W", "%W"] appear in this argument
        # list, and MaxRunTimeDefault is not set (see below), both of
        # those arguments will be dropped from the argument list when
        # running a container that has no max_run_time value.
        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]", "-We", "%W"]

        # Arguments that will be appended to the bsub command line
        # when submitting Arvados containers as LSF jobs with
        # runtime_constraints.cuda.device_count > 0
        BsubCUDAArguments: ["-gpu", "num=%G"]

        # Use sudo to switch to this user account when submitting LSF
        # jobs.
        #
        # This account must exist on the hosts where LSF jobs run
        # ("execution hosts"), as well as on the host where the
        # Arvados LSF dispatcher runs ("submission host").
        BsubSudoUser: "crunch"

        # When passing the scheduling_constraints.max_run_time value
        # to LSF via "%W", add this much time to account for
        # crunch-run startup/shutdown overhead.
        MaxRunTimeOverhead: 5m

        # If non-zero, MaxRunTimeDefault is used as the default value
        # for max_run_time for containers that do not specify a time
        # limit.  MaxRunTimeOverhead will be added to this.
        #
        # Example:
        # MaxRunTimeDefault: 2h
        MaxRunTimeDefault: 0

      JobsAPI:
        # Enable the legacy 'jobs' API (crunch v1).  This value must be a string.
        #
        # Note: this only enables read-only access, creating new
        # legacy jobs and pipelines is not supported.
        #
        # 'auto' -- (default) enable the Jobs API only if it has been used before
        #         (i.e., there are job records in the database)
        # 'true' -- enable the Jobs API despite lack of existing records.
        # 'false' -- disable the Jobs API despite presence of existing records.
        Enable: 'auto'

        # Git repositories must be readable by api server, or you won't be
        # able to submit crunch jobs. To pass the test suites, put a clone
        # of the arvados tree in {git_repositories_dir}/arvados.git or
        # {git_repositories_dir}/arvados/.git
        GitInternalDir: /var/lib/arvados/internal.git

      CloudVMs:
        # Enable the cloud scheduler.
        Enable: false

        # Name/number of port where workers' SSH services listen.
        SSHPort: "22"

        # Interval between queue polls.
        PollInterval: 10s

        # Shell command to execute on each worker to determine whether
        # the worker is booted and ready to run containers. It should
        # exit zero if the worker is ready.
        BootProbeCommand: "systemctl is-system-running"

        # Minimum interval between consecutive probes to a single
        # worker.
        ProbeInterval: 10s

        # Maximum probes per second, across all workers in a pool.
        MaxProbesPerSecond: 10

        # Time before repeating SIGTERM when killing a container.
        TimeoutSignal: 5s

        # Time to give up on a process (most likely arv-mount) that
        # still holds a container lockfile after its main supervisor
        # process has exited, and declare the instance broken.
        TimeoutStaleRunLock: 5s

        # Time to give up on SIGTERM and write off the worker.
        TimeoutTERM: 2m

        # Maximum create/destroy-instance operations per second (0 =
        # unlimited).
        MaxCloudOpsPerSecond: 10

        # Maximum concurrent instance creation operations (0 = unlimited).
        #
        # MaxConcurrentInstanceCreateOps limits the number of instance creation
        # requests that can be in flight at any one time, whereas
        # MaxCloudOpsPerSecond limits the number of create/destroy operations
        # that can be started per second.
        #
        # Because the API for instance creation on Azure is synchronous, it is
        # recommended to increase MaxConcurrentInstanceCreateOps when running
        # on Azure. When using managed images, a value of 20 would be
        # appropriate. When using Azure Shared Image Galeries, it could be set
        # higher. For more information, see
        # https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image
        #
        # MaxConcurrentInstanceCreateOps can be increased for other cloud
        # providers too, if desired.
        MaxConcurrentInstanceCreateOps: 1

        # The maximum number of instances to run at a time, or 0 for
        # unlimited.
        #
        # If more instances than this are already running and busy
        # when the dispatcher starts up, the running containers will
        # be allowed to finish before the excess instances are shut
        # down.
        MaxInstances: 64

        # The minimum number of instances expected to be runnable
        # without reaching a provider-imposed quota.
        #
        # This is used as the initial value for the dispatcher's
        # dynamic instance limit, which increases (up to MaxInstances)
        # as containers start up successfully and decreases in
        # response to high API load and cloud quota errors.
        #
        # Setting this to 0 means the dynamic instance limit will
        # start at MaxInstances.
        #
        # Situations where you may want to set this (to a value less
        # than MaxInstances) would be when there is significant
        # variability or uncertainty in the actual cloud resources
        # available.  Upon reaching InitialQuotaEstimate the
        # dispatcher will switch to a more conservative behavior with
        # slower instance start to avoid over-shooting cloud resource
        # limits.
        InitialQuotaEstimate: 0

        # Maximum fraction of available instance capacity allowed to
        # run "supervisor" containers at any given time. A supervisor
        # is a container whose purpose is mainly to submit and manage
        # other containers, such as arvados-cwl-runner workflow
        # runner.
        #
        # If there is a hard limit on the amount of concurrent
        # containers that the cluster can run, it is important to
        # avoid crowding out the containers doing useful work with
        # containers who just create more work.
        #
        # For example, with the default MaxInstances of 64, it will
        # schedule at most floor(64*0.50) = 32 concurrent workflow
        # runners, ensuring 32 slots are available for work.
        SupervisorFraction: 0.50

        # Interval between cloud provider syncs/updates ("list all
        # instances").
        SyncInterval: 1m

        # Time to leave an idle worker running (in case new containers
        # appear in the queue that it can run) before shutting it
        # down.
        TimeoutIdle: 1m

        # Time to wait for a new worker to boot (i.e., pass
        # BootProbeCommand) before giving up and shutting it down.
        TimeoutBooting: 10m

        # Maximum time a worker can stay alive with no successful
        # probes before being automatically shut down.
        TimeoutProbe: 10m

        # Time after shutting down a worker to retry the
        # shutdown/destroy operation.
        TimeoutShutdown: 10s

        # Worker VM image ID.
        # (aws) AMI identifier
        # (azure) managed disks: the name of the managed disk image
        # (azure) shared image gallery: the name of the image definition. Also
        # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
        # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
        # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
        ImageID: ""

        # Shell script to run on new instances using the cloud
        # provider's UserData (EC2) or CustomData (Azure) feature.
        #
        # It is not necessary to include a #!/bin/sh line.
        InstanceInitCommand: ""

        # An executable file (located on the dispatcher host) to be
        # copied to cloud instances at runtime and used as the
        # container runner/supervisor. The default value is the
        # dispatcher program itself.
        #
        # Use an empty string to disable this step: nothing will be
        # copied, and cloud instances are assumed to have a suitable
        # version of crunch-run installed; see CrunchRunCommand above.
        DeployRunnerBinary: "/proc/self/exe"

        # Install the Dispatcher's SSH public key (derived from
        # DispatchPrivateKey) when creating new cloud
        # instances. Change this to false if you are using a different
        # mechanism to pre-install the public key on new instances.
        DeployPublicKey: true

        # Tags to add on all resources (VMs, NICs, disks) created by
        # the container dispatcher. (Arvados's own tags --
        # InstanceType, IdleBehavior, and InstanceSecret -- will also
        # be added.)
        ResourceTags:
          SAMPLE: "tag value"

        # Prefix for predefined tags used by Arvados (InstanceSetID,
        # InstanceType, InstanceSecret, IdleBehavior). With the
        # default value "Arvados", tags are "ArvadosInstanceSetID",
        # "ArvadosInstanceSecret", etc.
        #
        # This should only be changed while no cloud resources are in
        # use and the cloud dispatcher is not running. Otherwise,
        # VMs/resources that were added using the old tag prefix will
        # need to be detected and cleaned up manually.
        TagKeyPrefix: Arvados

        # Cloud driver: "azure" (Microsoft Azure), "ec2" (Amazon AWS),
        # or "loopback" (run containers on dispatch host for testing
        # purposes).
        Driver: ec2

        # Cloud-specific driver parameters.
        DriverParameters:

          # (ec2) Credentials. Omit or leave blank if using IAM role.
          AccessKeyID: ""
          SecretAccessKey: ""

          # (ec2) Instance configuration.

          # (ec2) Region, like "us-east-1".
          Region: ""

          # (ec2) Security group IDs. Omit or use {} to use the
          # default security group.
          SecurityGroupIDs:
            "SAMPLE": {}

          # (ec2) One or more subnet IDs. Omit or leave empty to let
          # AWS choose a default subnet from your default VPC. If
          # multiple subnets are configured here (enclosed in brackets
          # like [subnet-abc123, subnet-def456]) the cloud dispatcher
          # will detect subnet-related errors and retry using a
          # different subnet. Most sites specify one subnet.
          SubnetID: ""

          EBSVolumeType: gp2
          AdminUsername: debian
          # (ec2) name of the IAMInstanceProfile for instances started by
          # the cloud dispatcher. Leave blank when not needed.
          IAMInstanceProfile: ""

          # (ec2) how often to look up spot instance pricing data
          # (only while running spot instances) for the purpose of
          # calculating container cost estimates. A value of 0
          # disables spot price lookups entirely.
          SpotPriceUpdateInterval: 24h

          # (ec2) per-GiB-month cost of EBS volumes. Matches
          # EBSVolumeType. Used to account for AddedScratch when
          # calculating container cost estimates. Note that
          # https://aws.amazon.com/ebs/pricing/ defines GB to mean
          # GiB, so an advertised price $0.10/GB indicates a real
          # price of $0.10/GiB and can be entered here as 0.10.
          EBSPrice: 0.10

          # (azure) Credentials.
          SubscriptionID: ""
          ClientID: ""
          ClientSecret: ""
          TenantID: ""

          # (azure) Instance configuration.
          CloudEnvironment: AzurePublicCloud
          Location: centralus

          # (azure) The resource group where the VM and virtual NIC will be
          # created.
          ResourceGroup: ""

          # (azure) The resource group of the Network to use for the virtual
          # NIC (if different from ResourceGroup)
          NetworkResourceGroup: ""
          Network: ""
          Subnet: ""

          # (azure) managed disks: The resource group where the managed disk
          # image can be found (if different from ResourceGroup).
          ImageResourceGroup: ""

          # (azure) shared image gallery: the name of the gallery
          SharedImageGalleryName: ""
          # (azure) shared image gallery: the version of the image definition
          SharedImageGalleryImageVersion: ""

          # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
          StorageAccount: ""
          BlobContainer: ""

          # (azure) How long to wait before deleting VHD and NIC
          # objects that are no longer being used.
          DeleteDanglingResourcesAfter: 20s

          # Account (that already exists in the VM image) that will be
          # set up with an ssh authorized key to allow the compute
          # dispatcher to connect.
          AdminUsername: arvados

    InstanceTypes:

      # Use the instance type name as the key (in place of "SAMPLE" in
      # this sample entry).
      SAMPLE:
        # Cloud provider's instance type. Defaults to the configured type name.
        ProviderType: ""
        VCPUs: 1
        RAM: 128MiB
        IncludedScratch: 16GB
        AddedScratch: 0
        # Hourly price ($), used to select node types for containers,
        # and to calculate estimated container costs. For spot
        # instances on EC2, this is also used as the maximum price
        # when launching spot instances, while the estimated container
        # cost is computed based on the current spot price according
        # to AWS. On Azure, and on-demand instances on EC2, the price
        # given here is used to compute container cost estimates.
        Price: 0.1
        Preemptible: false
        # Include this section if the node type includes GPU (CUDA) support
        CUDA:
          DriverVersion: "11.0"
          HardwareCapability: "9.0"
          DeviceCount: 1

    StorageClasses:

      # If you use multiple storage classes, specify them here, using
      # the storage class name as the key (in place of "SAMPLE" in
      # this sample entry).
      #
      # Further info/examples:
      # https://doc.arvados.org/admin/storage-classes.html
      SAMPLE:

        # Priority determines the order volumes should be searched
        # when reading data, in cases where a keepstore server has
        # access to multiple volumes with different storage classes.
        Priority: 0

        # Default determines which storage class(es) should be used
        # when a user/client writes data or saves a new collection
        # without specifying storage classes.
        #
        # If any StorageClasses are configured, at least one of them
        # must have Default: true.
        Default: true

    Volumes:
      SAMPLE:
        # AccessViaHosts specifies which keepstore processes can read
        # and write data on the volume.
        #
        # For a local filesystem, AccessViaHosts has one entry,
        # indicating which server the filesystem is located on.
        #
        # For a network-attached backend accessible by all keepstore
        # servers, like a cloud storage bucket or an NFS mount,
        # AccessViaHosts can be empty/omitted.
        #
        # Further info/examples:
        # https://doc.arvados.org/install/configure-fs-storage.html
        # https://doc.arvados.org/install/configure-s3-object-storage.html
        # https://doc.arvados.org/install/configure-azure-blob-storage.html
        AccessViaHosts:
          SAMPLE:
            ReadOnly: false
          "http://host1.example:25107": {}
        ReadOnly: false
        # AllowTrashWhenReadOnly enables unused and overreplicated
        # blocks to be trashed/deleted even when ReadOnly is
        # true. Normally, this is false and ReadOnly prevents all
        # trash/delete operations as well as writes.
        AllowTrashWhenReadOnly: false
        Replication: 1
        StorageClasses:
          # If you have configured storage classes (see StorageClasses
          # section above), add an entry here for each storage class
          # satisfied by this volume.
          SAMPLE: true
        Driver: S3
        DriverParameters:
          # for s3 driver -- see
          # https://doc.arvados.org/install/configure-s3-object-storage.html
          IAMRole: aaaaa
          AccessKeyID: aaaaa
          SecretAccessKey: aaaaa
          Endpoint: ""
          Region: us-east-1
          Bucket: aaaaa
          LocationConstraint: false
          V2Signature: false
          IndexPageSize: 1000
          ConnectTimeout: 1m
          ReadTimeout: 10m
          RaceWindow: 24h
          PrefixLength: 0

          # For S3 driver, potentially unsafe tuning parameter,
          # intentionally excluded from main documentation.
          #
          # Enable deletion (garbage collection) even when the
          # configured BlobTrashLifetime is zero.  WARNING: eventual
          # consistency may result in race conditions that can cause
          # data loss.  Do not enable this unless you understand and
          # accept the risk.
          UnsafeDelete: false

          # for azure driver -- see
          # https://doc.arvados.org/install/configure-azure-blob-storage.html
          StorageAccountName: aaaaa
          StorageAccountKey: aaaaa
          StorageBaseURL: core.windows.net
          ContainerName: aaaaa
          RequestTimeout: 30s
          ListBlobsRetryDelay: 10s
          ListBlobsMaxAttempts: 10
          MaxGetBytes: 0
          WriteRaceInterval: 15s
          WriteRacePollTime: 1s

          # for local directory driver -- see
          # https://doc.arvados.org/install/configure-fs-storage.html
          Root: /var/lib/arvados/keep-data

          # For local directory driver, potentially confusing tuning
          # parameter, intentionally excluded from main documentation.
          #
          # When true, read and write operations (for whole 64MiB
          # blocks) on an individual volume will queued and issued
          # serially.  When false, read and write operations will be
          # issued concurrently.
          #
          # May possibly improve throughput if you have physical spinning disks
          # and experience contention when there are multiple requests
          # to the same volume.
          #
          # Otherwise, when using SSDs, RAID, or a shared network filesystem, you
          # should leave this alone.
          Serialize: false

    Mail:
      # In order to send mail, Arvados expects a default SMTP server
      # on localhost:25.  It cannot require authentication on
      # connections from localhost.  That server should be configured
      # to relay mail to a "real" SMTP server that is able to send
      # email on behalf of your domain.

      # See also the "Users" configuration section for additional
      # email-related options.

      # When a user has been set up (meaning they are able to log in)
      # they will receive an email using the template specified
      # earlier in Users.UserSetupMailText
      SendUserSetupNotificationEmail: true

      # Bug/issue report notification to and from addresses
      IssueReporterEmailFrom: "arvados@example.com"
      IssueReporterEmailTo: "arvados@example.com"
      SupportEmailAddress: "arvados@example.com"

      # Generic issue email from
      EmailFrom: "arvados@example.com"

      # No longer supported, to be removed.
      MailchimpAPIKey: ""
      MailchimpListID: ""
    RemoteClusters:
      "*":
        Host: ""
        Proxy: false
        Scheme: https
        Insecure: false
        ActivateUsers: false
      SAMPLE:
        # API endpoint host or host:port; default is {id}.arvadosapi.com
        Host: sample.arvadosapi.com

        # Perform a proxy request when a local client requests an
        # object belonging to this remote.
        Proxy: false

        # Default "https". Can be set to "http" for testing.
        Scheme: https

        # Disable TLS verify. Can be set to true for testing.
        Insecure: false

        # When users present tokens issued by this remote cluster, and
        # their accounts are active on the remote cluster, activate
        # them on this cluster too.
        ActivateUsers: false

    Workbench:
      # Workbench1 configs
      Theme: default
      ActivationContactLink: mailto:info@arvados.org
      ArvadosDocsite: https://doc.arvados.org
      ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public
      ShowUserAgreementInline: false

      # Set this configuration to true to avoid providing an easy way for users
      # to share data with unauthenticated users; this may be necessary on
      # installations where strict data access controls are needed.
      DisableSharingURLsUI: false

      # Below is a sample setting of user_profile_form_fields config parameter.
      # This configuration parameter should be set to either false (to disable) or
      # to a map as shown below.
      # Configure the map of input fields to be displayed in the profile page
      # using the attribute "key" for each of the input fields.
      # This sample shows configuration with one required and one optional form fields.
      # For each of these input fields:
      #   You can specify "Type" as "text" or "select".
      #   List the "Options" to be displayed for each of the "select" menu.
      #   Set "Required" as "true" for any of these fields to make them required.
      # If any of the required fields are missing in the user's profile, the user will be
      # redirected to the profile page before they can access any Workbench features.
      UserProfileFormFields:
        SAMPLE:
          Type: select
          FormFieldTitle: Best color
          FormFieldDescription: your favorite color
          Required: false
          Position: 1
          Options:
            red: {}
            blue: {}
            green: {}
            SAMPLE: {}

        # exampleTextValue:  # key that will be set in properties
        #   Type: text  #
        #   FormFieldTitle: ""
        #   FormFieldDescription: ""
        #   Required: true
        #   Position: 1
        # exampleOptionsValue:
        #   Type: select
        #   FormFieldTitle: ""
        #   FormFieldDescription: ""
        #   Required: true
        #   Position: 1
        #   Options:
        #     red: {}
        #     blue: {}
        #     yellow: {}

      # Use "UserProfileFormMessage to configure the message you want
      # to display on the profile page.
      UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'

      SiteName: Arvados Workbench

      # Workbench2 configs
      FileViewersConfigURL: ""

      # Idle time after which the user's session will be auto closed.
      # This feature is disabled when set to zero.
      IdleTimeout: 0s

      # UUID of a collection.  This collection should be shared with
      # all users.  Workbench will look for a file "banner.html" in
      # this collection and display its contents (should be
      # HTML-formatted text) when users first log in to Workbench.
      BannerUUID: ""

      # Workbench welcome screen, this is HTML text that will be
      # incorporated directly onto the page.
      WelcomePageHTML: |
        <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
        <h2>Please log in.</h2>

        <p>If you have never used Arvados Workbench before, logging in
        for the first time will automatically create a new
        account.</p>

        <i>Arvados Workbench uses your information only for
        identification, and does not retrieve any other personal
        information.</i>

      # Workbench screen displayed to inactive users.  This is HTML
      # text that will be incorporated directly onto the page.
      InactivePageHTML: |
        <img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
        <h3>Hi! You're logged in, but...</h3>
        <p>Your account is inactive.</p>
        <p>An administrator must activate your account before you can get
        any further.</p>

      # Connecting to Arvados shell VMs tends to be site-specific.
      # Put any special instructions here. This is HTML text that will
      # be incorporated directly onto the Workbench page.
      SSHHelpPageHTML: |
        <a href="https://doc.arvados.org/user/getting_started/ssh-access-unix.html">Accessing an Arvados VM with SSH</a> (generic instructions).
        Site configurations vary.  Contact your local cluster administrator if you have difficulty accessing an Arvados shell node.

      # Sample text if you are using a "switchyard" ssh proxy.
      # Replace "zzzzz" with your Cluster ID.
      #SSHHelpPageHTML: |
      # <p>Add a section like this to your SSH configuration file ( <i>~/.ssh/config</i>):</p>
      # <pre>Host *.zzzzz
      #  TCPKeepAlive yes
      #  ServerAliveInterval 60
      #  ProxyCommand ssh -p2222 turnout@switchyard.zzzzz.arvadosapi.com -x -a $SSH_PROXY_FLAGS %h
      # </pre>

      # If you are using a switchyard ssh proxy, shell node hostnames
      # may require a special hostname suffix.  In the sample ssh
      # configuration above, this would be ".zzzzz"
      # This is added to the hostname in the "command line" column
      # the Workbench "shell VMs" page.
      #
      # If your shell nodes are directly accessible by users without a
      # proxy and have fully qualified host names, you should leave
      # this blank.
      SSHHelpHostSuffix: ""

# (Experimental) Restart services automatically when config file
# changes are detected. Only supported by `arvados-server boot` in
# dev/test mode.
AutoReloadConfig: false


Previous: InternalURLs and ExternalURL Next: Arvados upgrade notes

The content of this documentation is licensed under the Creative Commons Attribution-Share Alike 3.0 United States licence.
Code samples in this documentation are licensed under the Apache License, Version 2.0.