Configuration reference

The Arvados configuration is stored at /etc/arvados/config.yml

# Copyright (C) The Arvados Authors. All rights reserved.
#
# SPDX-License-Identifier: AGPL-3.0

# This file provides documentation and default values for all Arvados
# configuration entries.
#
# It is NOT intended to be copied and used as a starting point for a
# site configuration file. If you do that, the "SAMPLE" entries will
# be misinterpreted as real configuration entries, and future upgrades
# will leave your configuration file with stale defaults and
# documentation. Instead, you should create /etc/arvados/config.yml
# with only the entries you want to override, and refer to the latest
# version of this file for documentation.
#
# To show the entire configuration, computed from the current defaults
# and your local site configuration, run `arvados-server config-dump`.

Clusters:
xxxxx:
# Token used internally by Arvados components to authenticate to
# one another. Use a string of at least 50 random alphanumerics.
SystemRootToken: ""

# Token to be included in all healthcheck requests. Disabled by default.
# Server expects request header of the format "Authorization: Bearer xxx"
ManagementToken: ""

Services:

# Each of the service sections below specifies InternalURLs
# (each with optional ListenURL) and ExternalURL.
#
# InternalURLs specify how other Arvados service processes will
# connect to the service. Typically these use internal hostnames
# and high port numbers. Example:
#
# InternalURLs:
# "http://host1.internal.example:12345": {}
# "http://host2.internal.example:12345": {}
#
# ListenURL specifies the address and port the service process's
# HTTP server should listen on, if different from the
# InternalURL itself. Example, using an intermediate TLS proxy:
#
# InternalURLs:
# "https://host1.internal.example":
# ListenURL: "http://10.0.0.7:12345"
#
# When there are multiple InternalURLs configured, the service
# process will try listening on each InternalURLs (using
# ListenURL if provided) until one works. If you use a ListenURL
# like "0.0.0.0" which can be bound on any machine, use an
# environment variable
# ARVADOS_SERVICE_INTERNAL_URL=http://host1.internal.example to
# control which entry to use.
#
# ExternalURL specifies how applications/clients will connect to
# the service, regardless of whether they are inside or outside
# the cluster. Example:
#
# ExternalURL: "https://keep.zzzzz.example.com/"
#
# To avoid routing internal traffic through external networks,
# use split-horizon DNS for ExternalURL host names: inside the
# cluster's private network "host.zzzzz.example.com" resolves to
# the host's private IP address, while outside the cluster
# "host.zzzzz.example.com" resolves to the host's public IP
# address (or its external gateway or load balancer).

RailsAPI:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
Controller:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
Websocket:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
Keepbalance:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
DispatchCloud:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
DispatchLSF:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
DispatchSLURM:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
Keepproxy:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
WebDAV:
InternalURLs: {SAMPLE: {ListenURL: ""}}
# Base URL for Workbench inline preview. If blank, use
# WebDAVDownload instead, and disable inline preview.
# If both are empty, downloading collections from workbench
# will be impossible.
#
# It is important to properly configure the download service
# to migitate cross-site-scripting (XSS) attacks. A HTML page
# can be stored in collection. If an attacker causes a victim
# to visit that page through Workbench, it will be rendered by
# the browser. If all collections are served at the same
# domain, the browser will consider collections as coming from
# the same origin and having access to the same browsing data,
# enabling malicious Javascript on that page to access Arvados
# on behalf of the victim.
#
# This is mitigating by having separate domains for each
# collection, or limiting preview to circumstances where the
# collection is not accessed with the user's regular
# full-access token.
#
# Serve preview links using uuid or pdh in subdomain
# (requires wildcard DNS and TLS certificate)
# https://*.collections.uuid_prefix.arvadosapi.com
#
# Serve preview links using uuid or pdh in main domain
# (requires wildcard DNS and TLS certificate)
# https://*--collections.uuid_prefix.arvadosapi.com
#
# Serve preview links by setting uuid or pdh in the path.
# This configuration only allows previews of public data or
# collection-sharing links, because these use the anonymous
# user token or the token is already embedded in the URL.
# Other data must be handled as downloads via WebDAVDownload:
# https://collections.uuid_prefix.arvadosapi.com
#
ExternalURL: ""

WebDAVDownload:
InternalURLs: {SAMPLE: {ListenURL: ""}}
# Base URL for download links. If blank, serve links to WebDAV
# with disposition=attachment query param. Unlike preview links,
# browsers do not render attachments, so there is no risk of XSS.
#
# If WebDAVDownload is blank, and WebDAV uses a
# single-origin form, then Workbench will show an error page
#
# Serve download links by setting uuid or pdh in the path:
# https://download.uuid_prefix.arvadosapi.com
#
ExternalURL: ""

Keepstore:
InternalURLs:
SAMPLE:
ListenURL: ""
# Rendezvous is normally empty/omitted. When changing the
# URL of a Keepstore service, Rendezvous should be set to
# the old URL (with trailing slash omitted) to preserve
# rendezvous ordering.
Rendezvous: ""
ExternalURL: ""
Composer:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
WebShell:
InternalURLs: {SAMPLE: {ListenURL: ""}}
# ShellInABox service endpoint URL for a given VM. If empty, do not
# offer web shell logins.
#
# E.g., using a path-based proxy server to forward connections to shell hosts:
# https://webshell.uuid_prefix.arvadosapi.com
#
# E.g., using a name-based proxy server to forward connections to shell hosts:
# https://*.webshell.uuid_prefix.arvadosapi.com
ExternalURL: ""
Workbench1:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
Workbench2:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
Health:
InternalURLs: {SAMPLE: {ListenURL: ""}}
ExternalURL: ""
ContainerWebServices:
InternalURLs: {SAMPLE: {ListenURL: ""}}
# URL used to make HTTP requests that are proxied to
# containers (which may host web apps or APIs). Requires
# wildcard DNS and TLS certificate.
#
# From a subdomain:
# https://*.containers.uuid_prefix.arvadosapi.com
#
# From the main domain:
# https://*--containers.uuid_prefix.arvadosapi.com
ExternalURL: ""
# If ExternalPortMin and ExternalPortMax are non-zero, and
# ExternalURL is not a wildcard, container services listed in
# published_ports can be reached via dynamically assigned
# ports in the range [ExternalPortMin, ExternalPortMax]. For
# example, if ExternalURL is https://example.com/ and
# ExternalPortMin is 8000, Arvados will dynamically assign
# https://example.com:8000/, https://example.com:8001/, etc.,
# as proxy addresses for services in running containers.
ExternalPortMin: 0
ExternalPortMax: 0

PostgreSQL:
# max concurrent connections per arvados server daemon
ConnectionPool: 32
Connection:
# All parameters here are passed to the PG client library in a connection string;
# see https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS
host: ""
port: ""
user: ""
password: ""
dbname: ""
SAMPLE: ""
API:
# Limits for how long a client token created by regular users can be valid,
# and also is used as a default expiration policy when no expiration date is
# specified.
# Default value zero means token expirations don't get clamped and no
# default expiration is set.
MaxTokenLifetime: 0s

# Maximum size (in bytes) allowed for a single API request. This
# limit is published in the discovery document for use by clients.
# Note: You must separately configure the upstream web server or
# proxy to actually enforce the desired maximum request size on the
# server side.
MaxRequestSize: 134217728

# Limit the number of bytes read from the database during an index
# request (by retrieving and returning fewer rows than would
# normally be returned in a single response).
# Note 1: This setting never reduces the number of returned rows to
# zero, no matter how big the first data row is.
# Note 2: Only columns that *can* grow large count against this limit.
# Small fixed-width columns like UUIDs and datetimes never do.
MaxIndexDatabaseRead: 134217728

# Maximum number of items to return when responding to a APIs that
# can return partial result sets using limit and offset parameters
# (e.g., *.index, groups.contents). If a request specifies a "limit"
# parameter higher than this value, this value is used instead.
MaxItemsPerResponse: 1000

# Maximum number of concurrent requests to process concurrently
# in a single service process, or 0 for no limit.
#
# Note this applies to all Arvados services (controller, webdav,
# websockets, etc.). Concurrency in the controller service is
# also effectively limited by MaxConcurrentRailsRequests (see
# below) because most controller requests proxy through to the
# RailsAPI service.
#
# HTTP proxies and load balancers downstream of arvados services
# should be configured to allow at least {MaxConcurrentRequest +
# MaxQueuedRequests + MaxGatewayTunnels} concurrent requests.
MaxConcurrentRequests: 64

# Maximum number of concurrent requests to process concurrently
# in a single RailsAPI service process, or 0 for no limit.
MaxConcurrentRailsRequests: 16

# Maximum number of incoming requests to hold in a priority
# queue waiting for one of the MaxConcurrentRequests slots to be
# free. When the queue is longer than this, respond 503 to the
# lowest priority request.
#
# If MaxQueuedRequests is 0, respond 503 immediately to
# additional requests while at the MaxConcurrentRequests limit.
MaxQueuedRequests: 128

# Maximum time a "lock container" request is allowed to wait in
# the incoming request queue before returning 503.
MaxQueueTimeForLockRequests: 2s

# Maximum number of active gateway tunnel connections. One slot
# is consumed by each "container shell" connection. If using an
# HPC dispatcher (LSF or Slurm), one slot is consumed by each
# running container. These do not count toward
# MaxConcurrentRequests.
MaxGatewayTunnels: 1000

# Maximum number of 64MiB memory buffers per Keepstore server process, or
# 0 for no limit. When this limit is reached, up to
# (MaxConcurrentRequests - MaxKeepBlobBuffers) HTTP requests requiring
# buffers (like GET and PUT) will wait for buffer space to be released.
# Any HTTP requests beyond MaxConcurrentRequests will receive an
# immediate 503 response.
#
# MaxKeepBlobBuffers should be set such that (MaxKeepBlobBuffers * 64MiB
# * 1.1) fits comfortably in memory. On a host dedicated to running
# Keepstore, divide total memory by 88MiB to suggest a suitable value.
# For example, if grep MemTotal /proc/meminfo reports MemTotal: 7125440
# kB, compute 7125440 / (88 * 1024)=79 and set MaxKeepBlobBuffers: 79
MaxKeepBlobBuffers: 128

# API methods to disable. Disabled methods are not listed in the
# discovery document, and respond 404 to all requests.
# Example: {"jobs.create":{}, "pipeline_instances.create": {}}
DisabledAPIs: {}

# Interval (seconds) between asynchronous permission view updates. Any
# permission-updating API called with the 'async' parameter schedules a an
# update on the permission view in the future, if not already scheduled.
AsyncPermissionsUpdateInterval: 20s

# Maximum number of concurrent outgoing requests to make while
# serving a single incoming multi-cluster (federated) request.
MaxRequestAmplification: 4

# Maximum wall clock time to spend handling an incoming request.
RequestTimeout: 5m

# Websocket will send a periodic empty event after 'SendTimeout'
# if there is no other activity to maintain the connection /
# detect dropped connections.
SendTimeout: 60s

WebsocketClientEventQueue: 64
WebsocketServerEventQueue: 4

# Timeout on requests to internal Keep services.
KeepServiceRequestTimeout: 15s

# Vocabulary file path, local to the node running the controller.
# This JSON file should contain the description of what's allowed
# as object's metadata. Its format is described at:
# https://doc.arvados.org/admin/metadata-vocabulary.html
VocabularyPath: ""

# If true, a project must have a non-empty description field in
# order to be frozen.
FreezeProjectRequiresDescription: false

# Project properties that must have non-empty values in order to
# freeze a project. Example: "property_name": {}
FreezeProjectRequiresProperties:
SAMPLE: {}

# If true, only an admin user can un-freeze a project. If false,
# any user with "manage" permission can un-freeze.
UnfreezeProjectRequiresAdmin: false

# (Experimental) Use row-level locking on update API calls.
LockBeforeUpdate: false

Users:
# Config parameters to automatically setup new users. If enabled,
# this users will be able to self-activate. Enable this if you want
# to run an open instance where anyone can create an account and use
# the system without requiring manual approval.
#
# The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.
# AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
AutoSetupNewUsers: false
AutoSetupNewUsersWithVmUUID: ""
AutoSetupUsernameBlacklist:
arvados: {}
git: {}
gitolite: {}
gitolite-admin: {}
root: {}
syslog: {}
SAMPLE: {}

# When NewUsersAreActive is set to true, new users will be active
# immediately. This skips the "self-activate" step which enforces
# user agreements. Should only be enabled for development.
NewUsersAreActive: false

# Newly activated users (whether set up by an admin or via
# AutoSetupNewUsers) immediately become visible to other active
# users.
#
# On a multi-tenant cluster, where the intent is for users to be
# invisible to one another unless they have been added to the
# same group(s) via Workbench admin interface, change this to
# false.
ActivatedUsersAreVisibleToOthers: true

# If a user creates an account with this email address, they
# will be automatically set to admin.
AutoAdminUserWithEmail: ""

# If AutoAdminFirstUser is set to true, the first user to log in when no
# other admin users exist will automatically become an admin user.
AutoAdminFirstUser: false

# Support email address to display in Workbench.
SupportEmailAddress: "arvados@example.com"

# Outgoing email configuration:
#
# In order to send mail, Arvados expects a default SMTP server
# on localhost:25. It cannot require authentication on
# connections from localhost. That server should be configured
# to relay mail to a "real" SMTP server that is able to send
# email on behalf of your domain.

# Recipient for notification email sent out when a user sets a
# profile on their account.
UserProfileNotificationAddress: ""

# When sending a NewUser, NewInactiveUser, or UserProfile
# notification, this is the 'From' address to use
AdminNotifierEmailFrom: arvados@example.com

# Prefix for email subjects for NewUser and NewInactiveUser emails
EmailSubjectPrefix: "[ARVADOS] "

# When sending a welcome email to the user, the 'From' address to use
UserNotifierEmailFrom: arvados@example.com

# The welcome email sent to new users will be blind copied to
# these addresses.
UserNotifierEmailBcc:
SAMPLE: {}

# Recipients for notification email sent out when a user account
# is created and already set up to be able to log in
NewUserNotificationRecipients:
SAMPLE: {}

# Recipients for notification email sent out when a user account
# has been created but the user cannot log in until they are
# set up by an admin.
NewInactiveUserNotificationRecipients:
SAMPLE: {}

# Set AnonymousUserToken to enable anonymous user access. Populate this
# field with a random string at least 50 characters long.
AnonymousUserToken: ""

# The login provider for a user may supply a primary email
# address and one or more alternate email addresses. If a new
# user has an alternate email address with the domain given
# here, use the username from the alternate email to generate
# the user's Arvados username. Otherwise, the username from
# user's primary email address is used for the Arvados username.
# Currently implemented for OpenID Connect only.
PreferDomainForUsername: ""

# Send an email to each user when their account has been set up
# (meaning they are able to log in).
SendUserSetupNotificationEmail: false

# Ruby ERB template used for the email sent out to users when
# they have been set up.
UserSetupMailText: |
<% if not @user.full_name.empty? -%>
<%= @user.full_name %>,
<% else -%>
Hi there,
<% end -%>

Your Arvados account has been set up. You can log in at

<%= Rails.configuration.Services.Workbench1.ExternalURL %>

Thanks,
Your Arvados administrator.

# If RoleGroupsVisibleToAll is true, all role groups are visible
# to all active users.
#
# If false, users must be granted permission to role groups in
# order to see them. This is more appropriate for a multi-tenant
# cluster.
RoleGroupsVisibleToAll: true

# If CanCreateRoleGroups is true, regular (non-admin) users can
# create new role groups.
#
# If false, only admins can create new role groups.
CanCreateRoleGroups: true

# During each period, a log entry with event_type="activity"
# will be recorded for each user who is active during that
# period. The object_uuid attribute will indicate the user's
# UUID.
#
# Multiple log entries for the same user may be generated during
# a period if there are multiple controller processes or a
# controller process is restarted.
#
# Use 0 to disable activity logging.
ActivityLoggingPeriod: 24h

# The SyncUser* options control what system resources are managed by
# arvados-login-sync on shell nodes. They correspond to:
# * SyncUserAccounts: The user's Unix account on the shell node
# * SyncUserGroups: The group memberships of that account
# * SyncUserSSHKeys: Whether to authorize the user's Arvados SSH keys
# * SyncUserAPITokens: Whether to set up the user's Arvados API token
# All default to true.
SyncUserAccounts: true
SyncUserGroups: true
SyncUserSSHKeys: true
SyncUserAPITokens: true

# If SyncUserGroups=true, then arvados-login-sync will ensure that all
# managed accounts are members of the Unix groups listed in
# SyncRequiredGroups, in addition to any groups listed in their Arvados
# login permission. The default list includes the "fuse" group so
# users can use arv-mount. You can require no groups by specifying an
# empty list (i.e., `SyncRequiredGroups: []`).
SyncRequiredGroups:
- fuse

# SyncIgnoredGroups is a list of group names. arvados-login-sync will
# never modify these groups. If user login permissions list any groups
# in SyncIgnoredGroups, they will be ignored. If a user's Unix account
# belongs to any of these groups, arvados-login-sync will not remove
# the account from that group. The default is a set of particularly
# security-sensitive groups across Debian- and Red Hat-based
# distributions.
SyncIgnoredGroups:
- adm
- disk
- kmem
- mem
- root
- shadow
- staff
- sudo
- sys
- utempter
- utmp
- wheel

AuditLogs:
# Time to keep audit logs, in seconds. (An audit log is a row added
# to the "logs" table in the PostgreSQL database each time an
# Arvados object is created, modified, or deleted.)
#
# Currently, websocket event notifications rely on audit logs, so
# this should not be set lower than 300 (5 minutes).
MaxAge: 336h

# Maximum number of log rows to delete in a single SQL transaction.
#
# If MaxDeleteBatch is 0, log entries will never be
# deleted by Arvados. Cleanup can be done by an external process
# without affecting any Arvados system processes, as long as very
# recent (<5 minutes old) logs are not deleted.
#
# 100000 is a reasonable batch size for most sites.
MaxDeleteBatch: 0

# Attributes to suppress in events and audit logs. Notably,
# specifying {"manifest_text": {}} here typically makes the database
# smaller and faster.
#
# Warning: Using any non-empty value here can have undesirable side
# effects for any client or component that relies on event logs.
# Use at your own risk.
UnloggedAttributes: {}

SystemLogs:

# Logging threshold: panic, fatal, error, warn, info, debug, or
# trace
LogLevel: info

# Logging format: json or text
Format: json

# Maximum characters of (JSON-encoded) query parameters to include
# in each request log entry. When params exceed this size, they will
# be JSON-encoded, truncated to this size, and logged as
# params_truncated.
MaxRequestLogParamsSize: 2000

# In all services except RailsAPI, periodically check whether
# the incoming HTTP request queue is nearly full (see
# MaxConcurrentRequests) and, if so, write a snapshot of the
# request queue to {service}-requests.json in the specified
# directory.
#
# Leave blank to disable.
RequestQueueDumpDirectory: ""

Collections:

# Enable access controls for data stored in Keep. This should
# always be set to true on a production cluster.
BlobSigning: true

# BlobSigningKey is a string of alphanumeric characters used to
# generate permission signatures for Keep locators. It must be
# identical to the permission key given to Keep. IMPORTANT: This
# is a site secret. It should be at least 50 characters.
#
# Modifying BlobSigningKey will invalidate all existing
# signatures, which can cause programs to fail (e.g., arv-put,
# arv-get, and Crunch jobs). To avoid errors, rotate keys only
# when no such processes are running.
BlobSigningKey: ""

# Enable garbage collection of unreferenced blobs in Keep.
BlobTrash: true

# Time to leave unreferenced blobs in "trashed" state before
# deleting them, or 0 to skip the "trashed" state entirely and
# delete unreferenced blobs.
#
# If you use any Amazon S3 buckets as storage volumes, this
# must be at least 24h to avoid occasional data loss.
BlobTrashLifetime: 336h

# How often to check for (and delete) trashed blocks whose
# BlobTrashLifetime has expired.
BlobTrashCheckInterval: 24h

# Maximum number of concurrent "trash blob" and "delete trashed
# blob" operations conducted by a single keepstore process. Each
# of these can be set to 0 to disable the respective operation.
#
# If BlobTrashLifetime is zero, "trash" and "delete trash"
# happen at once, so only the lower of these two values is used.
BlobTrashConcurrency: 4
BlobDeleteConcurrency: 4

# Maximum number of concurrent "create additional replica of
# existing blob" operations conducted by a single keepstore
# process.
BlobReplicateConcurrency: 4

# Default replication level for collections. This is used when a
# collection's replication_desired attribute is nil.
DefaultReplication: 2

# BlobSigningTTL determines the minimum lifetime of transient
# data, i.e., blocks that are not referenced by
# collections. Unreferenced blocks exist for two reasons:
#
# 1) A data block must be written to a disk/cloud backend device
# before a collection can be created/updated with a reference to
# it.
#
# 2) Deleting or updating a collection can remove the last
# remaining reference to a data block.
#
# If BlobSigningTTL is too short, long-running
# processes/containers will fail when they take too long (a)
# between writing blocks and writing collections that reference
# them, or (b) between reading collections and reading the
# referenced blocks.
#
# If BlobSigningTTL is too long, data will still be stored long
# after the referring collections are deleted, and you will
# needlessly fill up disks or waste money on cloud storage.
#
# Modifying BlobSigningTTL invalidates existing signatures; see
# BlobSigningKey note above.
#
# The default is 2 weeks.
BlobSigningTTL: 336h

# When running keep-balance, this is the destination filename for
# the list of lost block hashes if there are any, one per line.
# Updated automically during each successful run.
BlobMissingReport: ""

# keep-balance operates periodically, i.e.: do a
# scan/balance operation, sleep, repeat.
#
# BalancePeriod determines the interval between start times of
# successive scan/balance operations. If a scan/balance operation
# takes longer than BalancePeriod, the next one will follow it
# immediately.
#
# If SIGUSR1 is received during an idle period between operations,
# the next operation will start immediately.
BalancePeriod: 6h

# Limits the number of collections retrieved by keep-balance per
# API transaction. If this is zero, page size is
# determined by the API server's own page size limits (see
# API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
BalanceCollectionBatch: 0

# The size of keep-balance's internal queue of
# collections. Higher values may improve throughput by allowing
# keep-balance to fetch collections from the database while the
# current collection are still being processed, at the expense of
# using more memory. If this is zero or omitted, pages are
# processed serially.
BalanceCollectionBuffers: 4

# Maximum time for a rebalancing run. This ensures keep-balance
# eventually gives up and retries if, for example, a network
# error causes a hung connection that is never closed by the
# OS. It should be long enough that it doesn't interrupt a
# long-running balancing operation.
BalanceTimeout: 6h

# Maximum number of replication_confirmed /
# storage_classes_confirmed updates to write to the database
# after a rebalancing run. When many updates are needed, this
# spreads them over a few runs rather than applying them all at
# once.
BalanceUpdateLimit: 100000

# Maximum number of "pull block from other server" and "trash
# block" requests to send to each keepstore server at a
# time. Smaller values use less memory in keepstore and
# keep-balance. Larger values allow more progress per
# keep-balance iteration. A zero value computes all of the
# needed changes but does not apply any.
BalancePullLimit: 100000
BalanceTrashLimit: 100000

# Default lifetime for ephemeral collections: 2 weeks. This must not
# be less than BlobSigningTTL.
DefaultTrashLifetime: 336h

# Interval (seconds) between trash sweeps. During a trash sweep,
# collections are marked as trash if their trash_at time has
# arrived, and deleted if their delete_at time has arrived.
TrashSweepInterval: 60s

# If true, enable collection versioning.
# When a collection's preserve_version field is true or the current version
# is older than the amount of seconds defined on PreserveVersionIfIdle,
# a snapshot of the collection's previous state is created and linked to
# the current collection.
CollectionVersioning: true

# 0s = auto-create a new version on every update.
# -1s = never auto-create new versions.
# > 0s = auto-create a new version when older than the specified number of seconds.
PreserveVersionIfIdle: 10s

# If non-empty, allow project and collection names to contain
# the "/" character (slash/stroke/solidus), and replace "/" with
# the given string in the filesystem hierarchy presented by
# WebDAV. Example values are "%2f" and "{slash}". Names that
# contain the substitution string itself may result in confusing
# behavior, so a value like "_" is not recommended.
#
# If the default empty value is used, the server will reject
# requests to create or rename a collection when the new name
# contains "/".
#
# If the value "/" is used, project and collection names
# containing "/" will be allowed, but they will not be
# accessible via WebDAV.
#
# Use of this feature is not recommended, if it can be avoided.
ForwardSlashNameSubstitution: ""

# Include "folder objects" in S3 ListObjects responses.
S3FolderObjects: true

# Managed collection properties. At creation time, if the client didn't
# provide the listed keys, they will be automatically populated following
# one of the following behaviors:
#
# * UUID of the user who owns the containing project.
# responsible_person_uuid: {Function: original_owner, Protected: true}
#
# * Default concrete value.
# foo_bar: {Value: baz, Protected: false}
#
# If Protected is true, only an admin user can modify its value.
ManagedProperties:
SAMPLE: {Function: original_owner, Protected: true}

# In "trust all content" mode, Workbench will redirect download
# requests to WebDAV preview link, even in the cases when
# WebDAV would have to expose XSS vulnerabilities in order to
# handle the redirect (see discussion on Services.WebDAV).
#
# This setting has no effect in the recommended configuration, where the
# WebDAV service is configured to have a separate domain for every
# collection and XSS protection is provided by browsers' same-origin
# policy.
#
# The default setting (false) is appropriate for a multi-user site.
TrustAllContent: false

# Cache parameters for WebDAV content serving:
WebDAVCache:
# Time to cache manifests, permission checks, and sessions.
TTL: 300s

# Maximum amount of data cached in /var/cache/arvados/keep.
# Can be given as a percentage of filesystem size ("10%") or a
# number of bytes ("10 GiB")
DiskCacheSize: 10%

# Approximate memory limit (in bytes) for session cache.
#
# Note this applies to the in-memory representation of
# projects and collections -- metadata, block locators,
# filenames, etc. -- not the file data itself (see
# DiskCacheSize).
MaxCollectionBytes: 100 MB

# Persistent sessions.
MaxSessions: 100

# Selectively set permissions for regular users and admins to
# download or upload data files using the upload/download
# features for Workbench, WebDAV and S3 API support.
WebDAVPermission:
User:
Download: true
Upload: true
Admin:
Download: true
Upload: true

# Selectively set permissions for regular users and admins to be
# able to download or upload blocks using arv-put and
# arv-get from outside the cluster.
KeepproxyPermission:
User:
Download: true
Upload: true
Admin:
Download: true
Upload: true

# Post upload / download events to the API server logs table, so
# that they can be included in the arv-user-activity report.
# You can disable this if you find that it is creating excess
# load on the API server and you don't need it.
WebDAVLogEvents: true

# If a client requests partial content past the start of a file,
# and a request from the same client for the same file was logged
# within the past WebDAVLogDownloadInterval, do not write a new log.
# This throttling applies to both printed and API server logs.
# This reduces log output when clients like `aws s3 cp` download
# one file in small chunks in parallel.
# Set this to 0 to disable throttling and log all requests.
WebDAVLogDownloadInterval: 30s

# Per-connection output buffer for WebDAV downloads. May improve
# throughput for large files, particularly when storage volumes
# have high latency.
#
# Size be specified as a number of bytes ("0") or with units
# ("128KiB", "1 MB").
WebDAVOutputBuffer: 0

Login:
# One of the following mechanisms (Google, PAM, LDAP, or
# LoginCluster) should be enabled; see
# https://doc.arvados.org/install/setup-login.html

Google:
# Authenticate with Google.
Enable: false

# Use the Google Cloud console to enable the People API (APIs
# and Services > Enable APIs and services > Google People API
# > Enable), generate a Client ID and secret (APIs and
# Services > Credentials > Create credentials > OAuth client
# ID > Web application) and add your controller's /login URL
# (e.g., "https://zzzzz.example.com/login") as an authorized
# redirect URL.
ClientID: ""
ClientSecret: ""

# Allow users to log in to existing accounts using any verified
# email address listed by their Google account. If true, the
# Google People API must be enabled in order for Google login to
# work. If false, only the primary email address will be used.
AlternateEmailAddresses: true

# Send additional parameters with authentication requests. See
# https://developers.google.com/identity/protocols/oauth2/openid-connect#authenticationuriparameters
# for a list of supported parameters.
AuthenticationRequestParameters:
# Show the "choose which Google account" page, even if the
# client is currently logged in to exactly one Google
# account.
prompt: select_account

SAMPLE: ""

OpenIDConnect:
# Authenticate with an OpenID Connect provider.
Enable: false

# Issuer URL, e.g., "https://login.example.com".
#
# This must be exactly equal to the URL returned by the issuer
# itself in its config response ("isser" key). If the
# configured value is "https://example" and the provider
# returns "https://example:443" or "https://example/" then
# login will fail, even though those URLs are equivalent (RFC
# 3986).
Issuer: ""

# Your client ID and client secret (supplied by the provider).
ClientID: ""
ClientSecret: ""

# OpenID claim field containing the user's email
# address. Normally "email"; see
# https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims
EmailClaim: "email"

# OpenID claim field containing the email verification
# flag. Normally "email_verified". To accept every returned
# email address without checking a "verified" field at all,
# use an empty string "".
EmailVerifiedClaim: "email_verified"

# OpenID claim field containing the user's preferred
# username. If empty, use the mailbox part of the user's email
# address.
UsernameClaim: ""

# Send additional parameters with authentication requests,
# like {display: page, prompt: consent}. See
# https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest
# and refer to your provider's documentation for supported
# parameters.
AuthenticationRequestParameters:
SAMPLE: ""

# Accept an OIDC access token as an API token if the OIDC
# provider's UserInfo endpoint accepts it.
#
# AcceptAccessTokenScope should also be used when enabling
# this feature.
AcceptAccessToken: false

# Before accepting an OIDC access token as an API token, first
# check that it is a JWT whose "scope" value includes this
# value. Example: "https://zzzzz.example.com/" (your Arvados
# API endpoint).
#
# If this value is empty and AcceptAccessToken is true, all
# access tokens will be accepted regardless of scope,
# including non-JWT tokens. This is not recommended.
AcceptAccessTokenScope: ""

PAM:
# Use PAM to authenticate users.
Enable: false

# PAM service name. PAM will apply the policy in the
# corresponding config file (e.g., /etc/pam.d/arvados) or, if
# there is none, the default "other" config.
Service: arvados

# Domain name (e.g., "example.com") to use to construct the
# user's email address if PAM authentication returns a
# username with no "@". If empty, use the PAM username as the
# user's email address, whether or not it contains "@".
#
# Note that the email address is used as the primary key for
# user records when logging in. Therefore, if you change
# PAMDefaultEmailDomain after the initial installation, you
# should also update existing user records to reflect the new
# domain. Otherwise, next time those users log in, they will
# be given new accounts instead of accessing their existing
# accounts.
DefaultEmailDomain: ""

LDAP:
# Use an LDAP service to authenticate users.
Enable: false

# Server URL, like "ldap://ldapserver.example.com:389" or
# "ldaps://ldapserver.example.com:636".
URL: "ldap://ldap:389"

# Use StartTLS upon connecting to the server.
StartTLS: true

# Skip TLS certificate name verification.
InsecureTLS: false

# Mininum TLS version to negotiate when connecting to server
# (ldaps://... or StartTLS). It may be necessary to set this
# to "1.1" for compatibility with older LDAP servers that fail
# with 'LDAP Result Code 200 "Network Error": TLS handshake
# failed (tls: server selected unsupported protocol version
# 301)'.
#
# If blank, use the recommended minimum version (1.2).
MinTLSVersion: ""

# Strip the @domain part if a user supplies an email-style
# username with this domain. If "*", strip any user-provided
# domain. If "", never strip the domain part. Example:
# "example.com"
StripDomain: ""

# If, after applying StripDomain, the username contains no "@"
# character, append this domain to form an email-style
# username. Example: "example.com"
AppendDomain: ""

# The LDAP attribute to filter on when looking up a username
# (after applying StripDomain and AppendDomain).
SearchAttribute: uid

# Bind with this username (DN or UPN) and password when
# looking up the user record.
#
# Example user: "cn=admin,dc=example,dc=com"
SearchBindUser: ""
SearchBindPassword: ""

# Directory base for username lookup. Example:
# "ou=Users,dc=example,dc=com"
SearchBase: ""

# Additional filters to apply when looking up users' LDAP
# entries. This can be used to restrict access to a subset of
# LDAP users, or to disambiguate users from other directory
# entries that have the SearchAttribute present.
#
# Special characters in assertion values must be escaped (see
# RFC 4515).
#
# Example: "(objectClass=person)"
SearchFilters: ""

# LDAP attribute to use as the user's email address.
#
# Important: This must not be an attribute whose value can be
# edited in the directory by the users themselves. Otherwise,
# users can take over other users' Arvados accounts trivially
# (email address is the primary key for Arvados accounts.)
EmailAttribute: mail

# LDAP attribute to use as the preferred Arvados username. If
# no value is found (or this config is empty) the username
# originally supplied by the user will be used.
UsernameAttribute: uid

Test:
# Authenticate users listed here in the config file. This
# feature is intended to be used in test environments, and
# should not be used in production.
Enable: false
Users:
SAMPLE:
Email: alice@example.com
Password: xyzzy

# The cluster ID to delegate the user database. When set,
# logins on this cluster will be redirected to the login cluster
# (login cluster must appear in RemoteClusters with Proxy: true)
LoginCluster: ""

# How long a cached token belonging to a remote cluster will
# remain valid before it needs to be revalidated.
RemoteTokenRefresh: 5m

# How long a client token created from a login flow will be valid without
# asking the user to re-login. Example values: 60m, 8h.
# Default value zero means tokens don't have expiration.
TokenLifetime: 0s

# If true (default), tokens are allowed to create new tokens and
# view existing tokens belonging to the same user.
# If false, tokens are not allowed to view or create other
# tokens. New tokens can only be created by going through login
# again.
IssueTrustedTokens: true

# Origins (scheme://host[:port]) of clients trusted to receive
# new tokens via login process. The ExternalURLs of the local
# Workbench1 and Workbench2 are trusted implicitly and do not
# need to be listed here. If this is a LoginCluster, you
# probably want to include the other Workbench instances in the
# federation in this list.
#
# A wildcard like "https://*.example" will match client URLs
# like "https://a.example" and "https://a.b.c.example".
#
# Example:
#
# TrustedClients:
# "https://workbench.other-cluster.example": {}
# "https://workbench2.other-cluster.example": {}
TrustedClients:
SAMPLE: {}

# Treat any origin whose host part is "localhost" or a private
# IP address (e.g., http://10.0.0.123:3000/) as if it were
# listed in TrustedClients.
#
# Intended only for test/development use. Not appropriate for
# production use.
TrustPrivateNetworks: false

TLS:
# Use "file:///var/lib/acme/live/example.com/cert" and
# ".../privkey" to load externally managed certificates.
Certificate: ""
Key: ""

# Accept invalid certificates when connecting to servers. Never
# use this in production.
Insecure: false

ACME:
# Obtain certificates automatically for ExternalURL domains
# using an ACME server and http-01 validation.
#
# To use Let's Encrypt, specify "LE". To use the Let's
# Encrypt staging environment, specify "LE-staging". To use a
# different ACME server, specify the full directory URL
# ("https://...").
#
# Note: this feature is not yet implemented in released
# versions, only in the alpha/prerelease arvados-server-easy
# package.
#
# Implies agreement with the server's terms of service.
Server: ""

Containers:
# List of supported Docker Registry image formats that compute nodes
# are able to use. `arv keep docker` will error out if a user tries
# to store an image with an unsupported format. Use an empty array
# to skip the compatibility check (and display a warning message to
# that effect).
#
# Example for sites running docker < 1.10: {"v1": {}}
# Example for sites running docker >= 1.10: {"v2": {}}
# Example for disabling check: {}
SupportedDockerImageFormats:
"v2": {}
SAMPLE: {}

# Include details about job reuse decisions in the server log. This
# causes additional database queries to run, so it should not be
# enabled unless you expect to examine the resulting logs for
# troubleshooting purposes.
LogReuseDecisions: false

# Default value for keep_cache_ram of a container's
# runtime_constraints. Note: this gets added to the RAM request
# used to allocate a VM or submit an HPC job.
#
# If this is zero, container requests that don't specify RAM or
# disk cache size will use a disk cache, sized to the
# container's RAM requirement (but with minimum 2 GiB and
# maximum 32 GiB).
#
# Note: If you change this value, containers that used the previous
# default value will only be reused by container requests that
# explicitly specify the previous value in their keep_cache_ram
# runtime constraint.
DefaultKeepCacheRAM: 0

# Number of times a container can be unlocked before being
# automatically cancelled.
MaxDispatchAttempts: 10

# Default value for container_count_max for container requests. This is the
# number of times Arvados will create a new container to satisfy a container
# request. If a container is cancelled it will retry a new container if
# container_count < container_count_max on any container requests associated
# with the cancelled container.
MaxRetryAttempts: 3

# Schedule all child containers on preemptible instances (e.g. AWS
# Spot Instances) even if not requested by the submitter.
#
# If false, containers are scheduled on preemptible instances
# only when requested by the submitter.
#
# This flag is ignored if no preemptible instance types are
# configured, and has no effect on top-level containers.
AlwaysUsePreemptibleInstances: false

# Automatically add a preemptible variant for every
# non-preemptible entry in InstanceTypes below. The maximum bid
# price for the preemptible variant will be the non-preemptible
# price multiplied by PreemptiblePriceFactor. If 0, preemptible
# variants are not added automatically.
#
# A price factor of 1.0 is a reasonable starting point.
PreemptiblePriceFactor: 0

# When the lowest-priced instance type for a given container is
# not available, try other instance types, up to the indicated
# maximum price factor.
#
# For example, with AvailabilityPriceFactor 1.5, if the
# lowest-cost instance type A suitable for a given container
# costs $2/h, Arvados may run the container on any instance type
# B costing $3/h or less when instance type A is not available
# or an idle instance of type B is already running.
MaximumPriceFactor: 1.5

# PEM encoded SSH key (RSA, DSA, ECDSA, or ED25519) used by the
# cloud dispatcher for executing containers on worker VMs.
# Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
# and ends with "\n-----END RSA PRIVATE KEY-----\n".
#
# Use "file:///absolute/path/to/key" to load the key from a
# separate file instead of embedding it in the configuration
# file.
#
# Amazon EC2 only supports RSA and ED25519 keys.
DispatchPrivateKey: ""

# Maximum time to wait for workers to come up before abandoning
# stale locks from a previous dispatch process.
StaleLockTimeout: 1m

# The crunch-run command used to start a container on a worker node.
#
# When dispatching to cloud VMs, this is used only if
# DeployRunnerBinary in the CloudVMs section is set to the empty
# string.
CrunchRunCommand: "crunch-run"

# Extra arguments to add to crunch-run invocation
# Example: ["--cgroup-parent-subsystem=memory"]
CrunchRunArgumentsList: []

# Extra RAM to reserve on the node, in addition to
# the amount specified in the container's RuntimeConstraints
ReserveExtraRAM: 550MiB

# Minimum time between two attempts to run the same container
MinRetryPeriod: 0s

# Container runtime: "docker" (default) or "singularity"
RuntimeEngine: docker

# When running a container, run a dedicated keepstore process,
# using the specified number of 64 MiB memory buffers per
# allocated CPU core (VCPUs in the container's runtime
# constraints). The dedicated keepstore handles I/O for
# collections mounted in the container, as well as saving
# container logs.
#
# A zero value disables this feature.
#
# In order for this feature to be activated, no volume may use
# AccessViaHosts, and no writable volume may have Replication
# lower than Collections.DefaultReplication. If these
# requirements are not satisfied, the feature is disabled
# automatically regardless of the value given here.
#
# When an HPC dispatcher is in use (see SLURM and LSF sections),
# this feature depends on the operator to ensure an up-to-date
# cluster configuration file (/etc/arvados/config.yml) is
# available on all compute nodes. If it is missing or not
# readable by the crunch-run user, the feature will be disabled
# automatically. To read it from a different location, add a
# "-config=/path/to/config.yml" argument to
# CrunchRunArgumentsList above.
#
# When the cloud dispatcher is in use (see CloudVMs section) and
# this configuration is enabled, the entire cluster
# configuration file, including the system root token, is copied
# to the worker node and held in memory for the duration of the
# container.
LocalKeepBlobBuffersPerVCPU: 1

# When running a dedicated keepstore process for a container
# (see LocalKeepBlobBuffersPerVCPU), write keepstore log
# messages to keepstore.txt in the container's log collection.
#
# These log messages can reveal some volume configuration
# details, error messages from the cloud storage provider, etc.,
# which are not otherwise visible to users.
#
# Accepted values:
# * "none" -- no keepstore.txt file
# * "all" -- all logs, including request and response lines
# * "errors" -- all logs except "response" logs with 2xx
# response codes and "request" logs
LocalKeepLogsToContainerLog: none

Logging:
# Container logs are written to Keep and saved in a
# collection, which is updated periodically while the
# container runs. This value sets the interval between
# collection updates.
LogUpdatePeriod: 30m

# The log collection is also updated when the specified amount of
# log data (given in bytes) is produced in less than one update
# period.
LogUpdateSize: 32MiB

ShellAccess:
# An admin user can use "arvados-client shell" to start an
# interactive shell (with any user ID) in any running
# container.
Admin: true

# Any user can use "arvados-client shell" to start an
# interactive shell (with any user ID) in any running
# container that they started, provided it isn't also
# associated with a different user's container request.
#
# Interactive sessions make it easy to alter the container's
# runtime environment in ways that aren't recorded or
# reproducible. Consider the implications for automatic
# container reuse before enabling and using this feature. In
# particular, note that starting an interactive session does
# not disqualify a container from being reused by a different
# user/workflow in the future.
User: false

SLURM:
PrioritySpread: 0

# Arguments to sbatch when submitting Arvados containers as
# SLURM jobs.
#
# Template variables starting with % will be substituted as follows:
#
# %U uuid
# %C number of VCPUs
# %M memory in MiB
# %T tmp in MiB
# %G number of GPU devices (runtime_constraints.gpu.device_count)
# %W maximum run time in minutes
# %P comma separated partitions (scheduling_parameters.partitions)
# %I optimal instance type (if instance types are configured)
# %% a single % character
#
# Any argument containing %P or %W will be omitted if the
# corresponding parameter is empty, e.g., with the default
# configuration, "--partition=%P" will be omitted for
# containers that have no scheduling_parameters.partitions.
#
# Arvados prepends some additional non-configurable sbatch
# arguments, including "--no-requeue" and "--nice=...".
SbatchArgumentsList: ["--job-name=%U", "--mem=%M", "--cpus-per-task=%C", "--tmp=%T", "--partition=%P"]

# Additional arguments to sbatch when submitting containers
# that have runtime_constraints.gpu.device_count > 0
SbatchGPUArgumentsList: ["--gpus=%G"]

SbatchEnvironmentVariables:
SAMPLE: ""

LSF:
# Arguments to bsub when submitting Arvados containers as LSF jobs.
#
# Template variables starting with % will be substituted as follows:
#
# %U uuid
# %C number of VCPUs
# %M memory in MiB
# %T tmp in MiB
# %G number of GPU devices (runtime_constraints.gpu.device_count)
# %W maximum run time in minutes (see MaxRunTimeOverhead and
# MaxRunTimeDefault below)
#
# Use %% to express a literal %. For example, the %%J in the
# default argument list will be changed to %J, which is
# interpreted by bsub itself.
#
# Note that the default arguments cause LSF to write two files
# in /tmp on the compute node each time an Arvados container
# runs. Ensure you have something in place to delete old files
# from /tmp, or adjust the "-o" and "-e" arguments accordingly.
#
# If ["-We", "%W"] or ["-W", "%W"] appear in this argument
# list, and MaxRunTimeDefault is not set (see below), both of
# those arguments will be dropped from the argument list when
# running a container that has no max_run_time value.
BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]", "-We", "%W"]

# Arguments that will be appended to the bsub command line
# when submitting Arvados containers as LSF jobs with
# runtime_constraints.gpu.device_count > 0
BsubGPUArguments: ["-gpu", "num=%G"]

# Use sudo to switch to this user account when submitting LSF
# jobs.
#
# This account must exist on the hosts where LSF jobs run
# ("execution hosts"), as well as on the host where the
# Arvados LSF dispatcher runs ("submission host").
BsubSudoUser: "crunch"

# When passing the scheduling_constraints.max_run_time value
# to LSF via "%W", add this much time to account for
# crunch-run startup/shutdown overhead.
MaxRunTimeOverhead: 5m

# If non-zero, MaxRunTimeDefault is used as the default value
# for max_run_time for containers that do not specify a time
# limit. MaxRunTimeOverhead will be added to this.
#
# Example:
# MaxRunTimeDefault: 2h
MaxRunTimeDefault: 0

CloudVMs:
# Enable the cloud scheduler.
Enable: false

# Name/number of port where workers' SSH services listen.
SSHPort: "22"

# Interval between queue polls.
PollInterval: 10s

# Shell command to execute on each worker to determine whether
# the worker is booted and ready to run containers. It should
# exit zero if the worker is ready.
BootProbeCommand: "systemctl is-system-running"

# Minimum interval between consecutive probes to a single
# worker.
ProbeInterval: 10s

# Maximum probes per second, across all workers in a pool.
MaxProbesPerSecond: 10

# Time before repeating SIGTERM when killing a container.
TimeoutSignal: 5s

# Time to give up on a process (most likely arv-mount) that
# still holds a container lockfile after its main supervisor
# process has exited, and declare the instance broken.
TimeoutStaleRunLock: 5s

# Time to give up on SIGTERM and write off the worker.
TimeoutTERM: 2m

# Maximum create/destroy-instance operations per second (0 =
# unlimited).
MaxCloudOpsPerSecond: 10

# Maximum concurrent instance creation operations (0 = unlimited).
#
# MaxConcurrentInstanceCreateOps limits the number of instance creation
# requests that can be in flight at any one time, whereas
# MaxCloudOpsPerSecond limits the number of create/destroy operations
# that can be started per second.
#
# Because the API for instance creation on Azure is synchronous, it is
# recommended to increase MaxConcurrentInstanceCreateOps when running
# on Azure. When using managed images, a value of 20 would be
# appropriate. When using Azure Shared Image Galeries, it could be set
# higher. For more information, see
# https://docs.microsoft.com/en-us/azure/virtual-machines/linux/capture-image
#
# MaxConcurrentInstanceCreateOps can be increased for other cloud
# providers too, if desired.
MaxConcurrentInstanceCreateOps: 1

# The maximum number of instances to run at a time, or 0 for
# unlimited.
#
# If more instances than this are already running and busy
# when the dispatcher starts up, the running containers will
# be allowed to finish before the excess instances are shut
# down.
MaxInstances: 64

# The minimum number of instances expected to be runnable
# without reaching a provider-imposed quota.
#
# This is used as the initial value for the dispatcher's
# dynamic instance limit, which increases (up to MaxInstances)
# as containers start up successfully and decreases in
# response to high API load and cloud quota errors.
#
# Setting this to 0 means the dynamic instance limit will
# start at MaxInstances.
#
# Situations where you may want to set this (to a value less
# than MaxInstances) would be when there is significant
# variability or uncertainty in the actual cloud resources
# available. Upon reaching InitialQuotaEstimate the
# dispatcher will switch to a more conservative behavior with
# slower instance start to avoid over-shooting cloud resource
# limits.
InitialQuotaEstimate: 0

# Maximum fraction of available instance capacity allowed to
# run "supervisor" containers at any given time. A supervisor
# is a container whose purpose is mainly to submit and manage
# other containers, such as arvados-cwl-runner workflow
# runner.
#
# If there is a hard limit on the amount of concurrent
# containers that the cluster can run, it is important to
# avoid crowding out the containers doing useful work with
# containers who just create more work.
#
# For example, with the default MaxInstances of 64, it will
# schedule at most floor(64*0.50) = 32 concurrent workflow
# runners, ensuring 32 slots are available for work.
SupervisorFraction: 0.50

# Interval between cloud provider syncs/updates ("list all
# instances").
SyncInterval: 1m

# Time to leave an idle worker running (in case new containers
# appear in the queue that it can run) before shutting it
# down.
TimeoutIdle: 1m

# Time to wait for a new worker to boot (i.e., pass
# BootProbeCommand) before giving up and shutting it down.
TimeoutBooting: 10m

# Maximum time a worker can stay alive with no successful
# probes before being automatically shut down.
TimeoutProbe: 10m

# Time after shutting down a worker to retry the
# shutdown/destroy operation.
TimeoutShutdown: 10s

# Worker VM image ID.
# (aws) AMI identifier
# (azure) managed disks: the name of the managed disk image
# (azure) shared image gallery: the name of the image definition. Also
# see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
# (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
# https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
ImageID: ""

# Shell script to run on new instances using the cloud
# provider's UserData (EC2) or CustomData (Azure) feature.
#
# It is not necessary to include a #!/bin/sh line.
InstanceInitCommand: ""

# An executable file (located on the dispatcher host) to be
# copied to cloud instances at runtime and used as the
# "crunch-run" container runner/supervisor. The default value
# is the dispatcher program itself.
#
# Use an empty string to disable this step: nothing will be
# copied, and cloud instances are assumed to have a suitable
# version of crunch-run installed; see CrunchRunCommand above.
DeployRunnerBinary: "/proc/self/exe"

# Directory to store the crunch-run binary on cloud instances
# (see DeployRunnerBinary above). The "mkdir -p" command will
# be used to create the directory and its parents if needed.
DeployRunnerDirectory: /tmp/arvados-crunch-run

# Install the Dispatcher's SSH public key (derived from
# DispatchPrivateKey) when creating new cloud
# instances. Change this to false if you are using a different
# mechanism to pre-install the public key on new instances.
DeployPublicKey: true

# Tags to add on all resources (VMs, NICs, disks) created by
# the container dispatcher. (Arvados's own tags --
# InstanceType, IdleBehavior, and InstanceSecret -- will also
# be added.)
ResourceTags:
SAMPLE: "tag value"

# Prefix for predefined tags used by Arvados (InstanceSetID,
# InstanceType, InstanceSecret, IdleBehavior). With the
# default value "Arvados", tags are "ArvadosInstanceSetID",
# "ArvadosInstanceSecret", etc.
#
# This should only be changed while no cloud resources are in
# use and the cloud dispatcher is not running. Otherwise,
# VMs/resources that were added using the old tag prefix will
# need to be detected and cleaned up manually.
TagKeyPrefix: Arvados

# Cloud driver: "azure" (Microsoft Azure), "ec2" (Amazon AWS),
# or "loopback" (run containers on dispatch host for testing
# purposes).
Driver: ec2

# Cloud-specific driver parameters.
DriverParameters:

# (ec2) Credentials. Omit or leave blank if using IAM role.
AccessKeyID: ""
SecretAccessKey: ""

# (ec2) Instance configuration.

# (ec2) Region, like "us-east-1".
Region: ""

# (ec2) Security group IDs. Omit or use {} to use the
# default security group.
SecurityGroupIDs:
"SAMPLE": {}

# (ec2) One or more subnet IDs. Omit or leave empty to let
# AWS choose a default subnet from your default VPC. If
# multiple subnets are configured here (enclosed in brackets
# like [subnet-abc123, subnet-def456]) the cloud dispatcher
# will detect subnet-related errors and retry using a
# different subnet. Most sites specify one subnet.
SubnetID: ""

EBSVolumeType: gp2

# (ec2) name of the IAMInstanceProfile for instances started by
# the cloud dispatcher. Leave blank when not needed.
IAMInstanceProfile: ""

# (ec2) how often to look up spot instance pricing data
# (only while running spot instances) for the purpose of
# calculating container cost estimates. A value of 0
# disables spot price lookups entirely.
SpotPriceUpdateInterval: 24h

# (ec2) per-GiB-month cost of EBS volumes. Matches
# EBSVolumeType. Used to account for AddedScratch when
# calculating container cost estimates. Note that
# https://aws.amazon.com/ebs/pricing/ defines GB to mean
# GiB, so an advertised price $0.10/GB indicates a real
# price of $0.10/GiB and can be entered here as 0.10.
EBSPrice: 0.10

# (ec2) Mapping of alphabetic instance type prefix to
# instance quota group. Any prefix not listed here will be
# treated as a distinct instance quota group. For example,
# "trn1.2xlarge" will implicitly belong to instance quota
# group "trn".
#
# Knowing that multiple instance types belong to the same
# quota group enables the dispatcher to minimize futile
# attempts to create new instances when a quota has been
# reached.
#
# All keys must be lowercase.
InstanceTypeQuotaGroups:
a: standard
c: standard
d: standard
h: standard
i: standard
m: standard
r: standard
t: standard
z: standard
vt: g
p5: p5

# (azure) Credentials.
SubscriptionID: ""
ClientID: ""
ClientSecret: ""
TenantID: ""

# (azure) Instance configuration.
CloudEnvironment: AzurePublicCloud
Location: centralus

# (azure) The resource group where the VM and virtual NIC will be
# created.
ResourceGroup: ""

# (azure) The resource group of the Network to use for the virtual
# NIC (if different from ResourceGroup)
NetworkResourceGroup: ""
Network: ""
Subnet: ""

# (azure) managed disks: The resource group where the managed disk
# image can be found (if different from ResourceGroup).
ImageResourceGroup: ""

# (azure) shared image gallery: the name of the gallery
SharedImageGalleryName: ""
# (azure) shared image gallery: the version of the image definition
SharedImageGalleryImageVersion: ""

# (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
StorageAccount: ""
BlobContainer: ""

# (azure) How long to wait before deleting VHD and NIC
# objects that are no longer being used.
DeleteDanglingResourcesAfter: 20s

# Account (that already exists in the VM image) that will be
# set up with an ssh authorized key to allow the compute
# dispatcher to connect.
AdminUsername: crunch

InstanceTypes:

# Use the instance type name as the key (in place of "SAMPLE" in
# this sample entry).
SAMPLE:
# Cloud provider's instance type. Defaults to the configured type name.
ProviderType: ""
VCPUs: 1
RAM: 128MiB
IncludedScratch: 16GB
AddedScratch: 0

# Hourly price ($), used to select node types for containers,
# and to calculate estimated container costs. For spot
# instances on EC2, this is also used as the maximum price
# when launching spot instances, while the estimated container
# cost is computed based on the current spot price according
# to AWS. On Azure, and on-demand instances on EC2, the price
# given here is used to compute container cost estimates.
Price: 0.1
Preemptible: false

# Include this section if the instance type includes GPU support
GPU:
# The software stack, currently "cuda" or "rocm"
Stack: "cuda"

# The version of the driver installed on this instance, in
# X.Y format
DriverVersion: "11.0"

# The architecture or capabilities of the GPU hardware
#
# For 'cuda', this is the Compute Capability in X.Y
# format.
#
# For 'rocm', this is the LLVM target (e.g. gfx1100) for
# the GPU hardware.
HardwareTarget: "9.0"

# The number GPUs on this instance
DeviceCount: 1

# The amount of VRAM per GPU
VRAM: 8000000000

StorageClasses:

# If you use multiple storage classes, specify them here, using
# the storage class name as the key (in place of "SAMPLE" in
# this sample entry).
#
# Further info/examples:
# https://doc.arvados.org/admin/storage-classes.html
SAMPLE:

# Priority determines the order volumes should be searched
# when reading data, in cases where a keepstore server has
# access to multiple volumes with different storage classes.
Priority: 0

# Default determines which storage class(es) should be used
# when a user/client writes data or saves a new collection
# without specifying storage classes.
#
# If any StorageClasses are configured, at least one of them
# must have Default: true.
Default: true

Volumes:
SAMPLE:
# AccessViaHosts specifies which keepstore processes can read
# and write data on the volume.
#
# For a local filesystem, AccessViaHosts has one entry,
# indicating which server the filesystem is located on.
#
# For a network-attached backend accessible by all keepstore
# servers, like a cloud storage bucket or an NFS mount,
# AccessViaHosts can be empty/omitted.
#
# Further info/examples:
# https://doc.arvados.org/install/configure-fs-storage.html
# https://doc.arvados.org/install/configure-s3-object-storage.html
# https://doc.arvados.org/install/configure-azure-blob-storage.html
AccessViaHosts:
SAMPLE:
ReadOnly: false
"http://host1.example:25107": {}
ReadOnly: false
# AllowTrashWhenReadOnly enables unused and overreplicated
# blocks to be trashed/deleted even when ReadOnly is
# true. Normally, this is false and ReadOnly prevents all
# trash/delete operations as well as writes.
AllowTrashWhenReadOnly: false
Replication: 1
StorageClasses:
# If you have configured storage classes (see StorageClasses
# section above), add an entry here for each storage class
# satisfied by this volume.
SAMPLE: true
Driver: S3
DriverParameters:
# for s3 driver -- see
# https://doc.arvados.org/install/configure-s3-object-storage.html
AccessKeyID: aaaaa
SecretAccessKey: aaaaa
Endpoint: ""
Region: us-east-1
Bucket: aaaaa
LocationConstraint: false
V2Signature: false
UsePathStyle: false
IndexPageSize: 1000
ConnectTimeout: 1m
ReadTimeout: 10m
RaceWindow: 24h
PrefixLength: 0

# For S3 driver, potentially unsafe tuning parameter,
# intentionally excluded from main documentation.
#
# Enable deletion (garbage collection) even when the
# configured BlobTrashLifetime is zero. WARNING: eventual
# consistency may result in race conditions that can cause
# data loss. Do not enable this unless you understand and
# accept the risk.
UnsafeDelete: false

# for azure driver -- see
# https://doc.arvados.org/install/configure-azure-blob-storage.html
StorageAccountName: aaaaa
StorageAccountKey: aaaaa
StorageBaseURL: core.windows.net
ContainerName: aaaaa
RequestTimeout: 30s
ListBlobsRetryDelay: 10s
ListBlobsMaxAttempts: 10
MaxGetBytes: 0
WriteRaceInterval: 15s
WriteRacePollTime: 1s

# for local directory driver -- see
# https://doc.arvados.org/install/configure-fs-storage.html
Root: /var/lib/arvados/keep-data

# For local directory driver, potentially confusing tuning
# parameter, intentionally excluded from main documentation.
#
# When true, read and write operations (for whole 64MiB
# blocks) on an individual volume will queued and issued
# serially. When false, read and write operations will be
# issued concurrently.
#
# May possibly improve throughput if you have physical spinning disks
# and experience contention when there are multiple requests
# to the same volume.
#
# Otherwise, when using SSDs, RAID, or a shared network filesystem, you
# should leave this alone.
Serialize: false

RemoteClusters:
"*":
Host: ""
Proxy: false
Scheme: https
Insecure: false
ActivateUsers: false
SAMPLE:
# API endpoint host or host:port; default is {id}.arvadosapi.com
Host: sample.arvadosapi.com

# Perform a proxy request when a local client requests an
# object belonging to this remote.
Proxy: false

# Default "https". Can be set to "http" for testing.
Scheme: https

# Disable TLS verify. Can be set to true for testing.
Insecure: false

# When users present tokens issued by this remote cluster, and
# their accounts are active on the remote cluster, activate
# them on this cluster too.
ActivateUsers: false

Workbench:
# Workbench1 configs
Theme: default
ActivationContactLink: mailto:info@arvados.org
ArvadosDocsite: https://doc.arvados.org
ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public
ShowUserAgreementInline: false

# Set this configuration to true to avoid providing an easy way for users
# to share data with unauthenticated users; this may be necessary on
# installations where strict data access controls are needed.
DisableSharingURLsUI: false

# Below is a sample setting of user_profile_form_fields config parameter.
# This configuration parameter should be set to either false (to disable) or
# to a map as shown below.
# Configure the map of input fields to be displayed in the profile page
# using the attribute "key" for each of the input fields.
# This sample shows configuration with one required and one optional form fields.
# For each of these input fields:
# You can specify "Type" as "text" or "select".
# List the "Options" to be displayed for each of the "select" menu.
# Set "Required" as "true" for any of these fields to make them required.
# If any of the required fields are missing in the user's profile, the user will be
# redirected to the profile page before they can access any Workbench features.
UserProfileFormFields:
SAMPLE:
Type: select
FormFieldTitle: Best color
FormFieldDescription: your favorite color
Required: false
Position: 1
Options:
red: {}
blue: {}
green: {}
SAMPLE: {}

# exampleTextValue: # key that will be set in properties
# Type: text #
# FormFieldTitle: ""
# FormFieldDescription: ""
# Required: true
# Position: 1
# exampleOptionsValue:
# Type: select
# FormFieldTitle: ""
# FormFieldDescription: ""
# Required: true
# Position: 1
# Options:
# red: {}
# blue: {}
# yellow: {}

# Use "UserProfileFormMessage to configure the message you want
# to display on the profile page.
UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'

SiteName: Arvados Workbench

# Workbench2 configs
FileViewersConfigURL: ""

# Idle time after which the user's session will be auto closed.
# This feature is disabled when set to zero.
IdleTimeout: 0s

# UUID of a collection. This collection should be shared with
# all users. Workbench will look for a file "banner.html" in
# this collection and display its contents (should be
# HTML-formatted text) when users first log in to Workbench.
BannerUUID: ""

# Workbench welcome screen, this is HTML text that will be
# incorporated directly onto the page.
WelcomePageHTML: |
<img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
<h2>Please log in.</h2>

<p>If you have never used Arvados Workbench before, logging in
for the first time will automatically create a new
account.</p>

<i>Arvados Workbench uses your information only for
identification, and does not retrieve any other personal
information.</i>

# Workbench screen displayed to inactive users. This is HTML
# text that will be incorporated directly onto the page.
InactivePageHTML: |
<img src="/arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
<h3>Hi! You're logged in, but...</h3>
<p>Your account is inactive.</p>
<p>An administrator must activate your account before you can get
any further.</p>

# Connecting to Arvados shell VMs tends to be site-specific.
# Put any special instructions here. This is HTML text that will
# be incorporated directly onto the Workbench page.
SSHHelpPageHTML: |
<a href="https://doc.arvados.org/user/getting_started/ssh-access-unix.html">Accessing an Arvados VM with SSH</a> (generic instructions).
Site configurations vary. Contact your local cluster administrator if you have difficulty accessing an Arvados shell node.

# Sample text if you are using a "switchyard" ssh proxy.
# Replace "zzzzz" with your Cluster ID.
#SSHHelpPageHTML: |
# <p>Add a section like this to your SSH configuration file ( <i>~/.ssh/config</i>):</p>
# <pre>Host *.zzzzz
# TCPKeepAlive yes
# ServerAliveInterval 60
# ProxyCommand ssh -p2222 turnout@switchyard.zzzzz.arvadosapi.com -x -a $SSH_PROXY_FLAGS %h
# </pre>

# If you are using a switchyard ssh proxy, shell node hostnames
# may require a special hostname suffix. In the sample ssh
# configuration above, this would be ".zzzzz"
# This is added to the hostname in the "command line" column
# the Workbench "shell VMs" page.
#
# If your shell nodes are directly accessible by users without a
# proxy and have fully qualified host names, you should leave
# this blank.
SSHHelpHostSuffix: ""

# (Experimental) Restart services automatically when config file
# changes are detected. Only supported by `arvados-server boot` in
# dev/test mode.
AutoReloadConfig: false

Previous: InternalURLs and ExternalURL Next: Arvados upgrade notes