arvados.commands.migrate19

  1# Copyright (C) The Arvados Authors. All rights reserved.
  2#
  3# SPDX-License-Identifier: Apache-2.0
  4
  5from __future__ import print_function
  6from __future__ import division
  7import argparse
  8import time
  9import sys
 10import logging
 11import shutil
 12import tempfile
 13import os
 14import subprocess
 15import re
 16
 17import arvados
 18import arvados.commands.keepdocker
 19from arvados._version import __version__
 20from arvados.collection import CollectionReader
 21from .. import util
 22
 23logger = logging.getLogger('arvados.migrate-docker19')
 24logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
 25                else logging.INFO)
 26
 27_migration_link_class = 'docker_image_migration'
 28_migration_link_name = 'migrate_1.9_1.10'
 29
 30class MigrationFailed(Exception):
 31    pass
 32
 33@util._deprecated('3.0')
 34def main(arguments=None):
 35    """Docker image format migration tool for Arvados.
 36
 37    This converts Docker images stored in Arvados from image format v1
 38    (Docker <= 1.9) to image format v2 (Docker >= 1.10).
 39
 40    Requires Docker running on the local host.
 41
 42    Usage:
 43
 44    1) Run arvados/docker/migrate-docker19/build.sh to create
 45    arvados/migrate-docker19 Docker image.
 46
 47    2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
 48
 49    3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
 50
 51    This will query Arvados for v1 format Docker images.  For each image that
 52    does not already have a corresponding v2 format image (as indicated by a
 53    docker_image_migration tag) it will perform the following process:
 54
 55    i) download the image from Arvados
 56    ii) load it into Docker
 57    iii) update the Docker version, which updates the image
 58    iv) save the v2 format image and upload to Arvados
 59    v) create a migration link
 60
 61    """
 62
 63    migrate19_parser = argparse.ArgumentParser()
 64    migrate19_parser.add_argument(
 65        '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
 66        help='Print version and exit.')
 67    migrate19_parser.add_argument(
 68        '--verbose', action="store_true", help="Print stdout/stderr even on success")
 69    migrate19_parser.add_argument(
 70        '--force', action="store_true", help="Try to migrate even if there isn't enough space")
 71
 72    migrate19_parser.add_argument(
 73        '--storage-driver', type=str, default="overlay",
 74        help="Docker storage driver, e.g. aufs, overlay, vfs")
 75
 76    exgroup = migrate19_parser.add_mutually_exclusive_group()
 77    exgroup.add_argument(
 78        '--dry-run', action='store_true', help="Print number of pending migrations.")
 79    exgroup.add_argument(
 80        '--print-unmigrated', action='store_true',
 81        default=False, help="Print list of images needing migration.")
 82
 83    migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
 84
 85    migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
 86                                  default=None, help="List of images to be migrated")
 87
 88    args = migrate19_parser.parse_args(arguments)
 89
 90    if args.tempdir:
 91        tempfile.tempdir = args.tempdir
 92
 93    if args.verbose:
 94        logger.setLevel(logging.DEBUG)
 95
 96    only_migrate = None
 97    if args.infile:
 98        only_migrate = set()
 99        for l in args.infile:
100            only_migrate.add(l.strip())
101
102    api_client  = arvados.api()
103
104    user = api_client.users().current().execute()
105    if not user['is_admin']:
106        raise Exception("This command requires an admin token")
107    sys_uuid = user['uuid'][:12] + '000000000000000'
108
109    images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
110
111    is_new = lambda img: img['dockerhash'].startswith('sha256:')
112
113    count_new = 0
114    old_images = []
115    for uuid, img in images:
116        if img["dockerhash"].startswith("sha256:"):
117            continue
118        key = (img["repo"], img["tag"], img["timestamp"])
119        old_images.append(img)
120
121    migration_links = arvados.util.list_all(api_client.links().list, filters=[
122        ['link_class', '=', _migration_link_class],
123        ['name', '=', _migration_link_name],
124    ])
125
126    already_migrated = set()
127    for m in migration_links:
128        already_migrated.add(m["tail_uuid"])
129
130    items = arvados.util.list_all(api_client.collections().list,
131                                  filters=[["uuid", "in", [img["collection"] for img in old_images]]],
132                                  select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
133    uuid_to_collection = {i["uuid"]: i for i in items}
134
135    need_migrate = {}
136    totalbytes = 0
137    biggest = 0
138    biggest_pdh = None
139    for img in old_images:
140        i = uuid_to_collection[img["collection"]]
141        pdh = i["portable_data_hash"]
142        if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
143            need_migrate[pdh] = img
144            with CollectionReader(i["manifest_text"]) as c:
145                size = list(c.values())[0].size()
146                if size > biggest:
147                    biggest = size
148                    biggest_pdh = pdh
149                totalbytes += size
150
151
152    if args.storage_driver == "vfs":
153        will_need = (biggest*20)
154    else:
155        will_need = (biggest*2.5)
156
157    if args.print_unmigrated:
158        only_migrate = set()
159        for pdh in need_migrate:
160            print(pdh)
161        return
162
163    logger.info("Already migrated %i images", len(already_migrated))
164    logger.info("Need to migrate %i images", len(need_migrate))
165    logger.info("Using tempdir %s", tempfile.gettempdir())
166    logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20)
167    logger.info("Total data to migrate about %i MiB", totalbytes>>20)
168
169    df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
170    ln = df_out.splitlines()[1]
171    filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
172    if int(available) <= will_need:
173        logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, int(will_need)>>20)
174        if not args.force:
175            exit(1)
176        else:
177            logger.warn("--force provided, will migrate anyway")
178
179    if args.dry_run:
180        return
181
182    success = []
183    failures = []
184    count = 1
185    for old_image in list(need_migrate.values()):
186        if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
187            continue
188
189        oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
190        tarfile = list(oldcol.keys())[0]
191
192        logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
193                    old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
194        count += 1
195        start = time.time()
196
197        varlibdocker = tempfile.mkdtemp()
198        dockercache = tempfile.mkdtemp()
199        try:
200            with tempfile.NamedTemporaryFile() as envfile:
201                envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
202                envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
203                if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
204                    envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
205                envfile.flush()
206
207                dockercmd = ["docker", "run",
208                             "--privileged",
209                             "--rm",
210                             "--env-file", envfile.name,
211                             "--volume", "%s:/var/lib/docker" % varlibdocker,
212                             "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
213                             "arvados/migrate-docker19:1.0",
214                             "/root/migrate.sh",
215                             "%s/%s" % (old_image["collection"], tarfile),
216                             tarfile[0:40],
217                             old_image["repo"],
218                             old_image["tag"],
219                             uuid_to_collection[old_image["collection"]]["owner_uuid"],
220                             args.storage_driver]
221
222                proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
223                out, err = proc.communicate()
224
225                initial_space = re.search(r"Initial available space is (\d+)", out)
226                imgload_space = re.search(r"Available space after image load is (\d+)", out)
227                imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
228                keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
229                cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
230
231                if initial_space:
232                    isp = int(initial_space.group(1))
233                    logger.info("Available space initially: %i MiB", (isp)/(2**20))
234                    if imgload_space:
235                        sp = int(imgload_space.group(1))
236                        logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
237                    if imgupgrade_space:
238                        sp = int(imgupgrade_space.group(1))
239                        logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
240                    if keepdocker_space:
241                        sp = int(keepdocker_space.group(1))
242                        logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
243
244                if cleanup_space:
245                    sp = int(cleanup_space.group(1))
246                    logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
247
248                if proc.returncode != 0:
249                    logger.error("Failed with return code %i", proc.returncode)
250                    logger.error("--- Stdout ---\n%s", out)
251                    logger.error("--- Stderr ---\n%s", err)
252                    raise MigrationFailed()
253
254                if args.verbose:
255                    logger.info("--- Stdout ---\n%s", out)
256                    logger.info("--- Stderr ---\n%s", err)
257
258            migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
259            if migrated:
260                newcol = CollectionReader(migrated.group(1))
261
262                api_client.links().create(body={"link": {
263                    'owner_uuid': sys_uuid,
264                    'link_class': _migration_link_class,
265                    'name': _migration_link_name,
266                    'tail_uuid': oldcol.portable_data_hash(),
267                    'head_uuid': newcol.portable_data_hash()
268                    }}).execute(num_retries=3)
269
270                logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
271                            oldcol.portable_data_hash(), old_image["collection"],
272                            newcol.portable_data_hash(), migrated.group(1),
273                            time.time() - start)
274                already_migrated.add(oldcol.portable_data_hash())
275                success.append(old_image["collection"])
276            else:
277                logger.error("Error migrating '%s'", old_image["collection"])
278                failures.append(old_image["collection"])
279        except Exception as e:
280            logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
281                         exc_info=(not isinstance(e, MigrationFailed)))
282            failures.append(old_image["collection"])
283        finally:
284            shutil.rmtree(varlibdocker)
285            shutil.rmtree(dockercache)
286
287    logger.info("Successfully migrated %i images", len(success))
288    if failures:
289        logger.error("Failed to migrate %i images", len(failures))
logger = <Logger arvados.migrate-docker19 (INFO)>
class MigrationFailed(builtins.Exception):
31class MigrationFailed(Exception):
32    pass

Common base class for all non-exit exceptions.

Inherited Members
builtins.Exception
Exception
builtins.BaseException
with_traceback
args
@util._deprecated('3.0')
def main(arguments=None):
 34@util._deprecated('3.0')
 35def main(arguments=None):
 36    """Docker image format migration tool for Arvados.
 37
 38    This converts Docker images stored in Arvados from image format v1
 39    (Docker <= 1.9) to image format v2 (Docker >= 1.10).
 40
 41    Requires Docker running on the local host.
 42
 43    Usage:
 44
 45    1) Run arvados/docker/migrate-docker19/build.sh to create
 46    arvados/migrate-docker19 Docker image.
 47
 48    2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
 49
 50    3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
 51
 52    This will query Arvados for v1 format Docker images.  For each image that
 53    does not already have a corresponding v2 format image (as indicated by a
 54    docker_image_migration tag) it will perform the following process:
 55
 56    i) download the image from Arvados
 57    ii) load it into Docker
 58    iii) update the Docker version, which updates the image
 59    iv) save the v2 format image and upload to Arvados
 60    v) create a migration link
 61
 62    """
 63
 64    migrate19_parser = argparse.ArgumentParser()
 65    migrate19_parser.add_argument(
 66        '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
 67        help='Print version and exit.')
 68    migrate19_parser.add_argument(
 69        '--verbose', action="store_true", help="Print stdout/stderr even on success")
 70    migrate19_parser.add_argument(
 71        '--force', action="store_true", help="Try to migrate even if there isn't enough space")
 72
 73    migrate19_parser.add_argument(
 74        '--storage-driver', type=str, default="overlay",
 75        help="Docker storage driver, e.g. aufs, overlay, vfs")
 76
 77    exgroup = migrate19_parser.add_mutually_exclusive_group()
 78    exgroup.add_argument(
 79        '--dry-run', action='store_true', help="Print number of pending migrations.")
 80    exgroup.add_argument(
 81        '--print-unmigrated', action='store_true',
 82        default=False, help="Print list of images needing migration.")
 83
 84    migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
 85
 86    migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
 87                                  default=None, help="List of images to be migrated")
 88
 89    args = migrate19_parser.parse_args(arguments)
 90
 91    if args.tempdir:
 92        tempfile.tempdir = args.tempdir
 93
 94    if args.verbose:
 95        logger.setLevel(logging.DEBUG)
 96
 97    only_migrate = None
 98    if args.infile:
 99        only_migrate = set()
100        for l in args.infile:
101            only_migrate.add(l.strip())
102
103    api_client  = arvados.api()
104
105    user = api_client.users().current().execute()
106    if not user['is_admin']:
107        raise Exception("This command requires an admin token")
108    sys_uuid = user['uuid'][:12] + '000000000000000'
109
110    images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3)
111
112    is_new = lambda img: img['dockerhash'].startswith('sha256:')
113
114    count_new = 0
115    old_images = []
116    for uuid, img in images:
117        if img["dockerhash"].startswith("sha256:"):
118            continue
119        key = (img["repo"], img["tag"], img["timestamp"])
120        old_images.append(img)
121
122    migration_links = arvados.util.list_all(api_client.links().list, filters=[
123        ['link_class', '=', _migration_link_class],
124        ['name', '=', _migration_link_name],
125    ])
126
127    already_migrated = set()
128    for m in migration_links:
129        already_migrated.add(m["tail_uuid"])
130
131    items = arvados.util.list_all(api_client.collections().list,
132                                  filters=[["uuid", "in", [img["collection"] for img in old_images]]],
133                                  select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
134    uuid_to_collection = {i["uuid"]: i for i in items}
135
136    need_migrate = {}
137    totalbytes = 0
138    biggest = 0
139    biggest_pdh = None
140    for img in old_images:
141        i = uuid_to_collection[img["collection"]]
142        pdh = i["portable_data_hash"]
143        if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
144            need_migrate[pdh] = img
145            with CollectionReader(i["manifest_text"]) as c:
146                size = list(c.values())[0].size()
147                if size > biggest:
148                    biggest = size
149                    biggest_pdh = pdh
150                totalbytes += size
151
152
153    if args.storage_driver == "vfs":
154        will_need = (biggest*20)
155    else:
156        will_need = (biggest*2.5)
157
158    if args.print_unmigrated:
159        only_migrate = set()
160        for pdh in need_migrate:
161            print(pdh)
162        return
163
164    logger.info("Already migrated %i images", len(already_migrated))
165    logger.info("Need to migrate %i images", len(need_migrate))
166    logger.info("Using tempdir %s", tempfile.gettempdir())
167    logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20)
168    logger.info("Total data to migrate about %i MiB", totalbytes>>20)
169
170    df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
171    ln = df_out.splitlines()[1]
172    filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
173    if int(available) <= will_need:
174        logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, int(will_need)>>20)
175        if not args.force:
176            exit(1)
177        else:
178            logger.warn("--force provided, will migrate anyway")
179
180    if args.dry_run:
181        return
182
183    success = []
184    failures = []
185    count = 1
186    for old_image in list(need_migrate.values()):
187        if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
188            continue
189
190        oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
191        tarfile = list(oldcol.keys())[0]
192
193        logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
194                    old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
195        count += 1
196        start = time.time()
197
198        varlibdocker = tempfile.mkdtemp()
199        dockercache = tempfile.mkdtemp()
200        try:
201            with tempfile.NamedTemporaryFile() as envfile:
202                envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
203                envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
204                if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
205                    envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
206                envfile.flush()
207
208                dockercmd = ["docker", "run",
209                             "--privileged",
210                             "--rm",
211                             "--env-file", envfile.name,
212                             "--volume", "%s:/var/lib/docker" % varlibdocker,
213                             "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
214                             "arvados/migrate-docker19:1.0",
215                             "/root/migrate.sh",
216                             "%s/%s" % (old_image["collection"], tarfile),
217                             tarfile[0:40],
218                             old_image["repo"],
219                             old_image["tag"],
220                             uuid_to_collection[old_image["collection"]]["owner_uuid"],
221                             args.storage_driver]
222
223                proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
224                out, err = proc.communicate()
225
226                initial_space = re.search(r"Initial available space is (\d+)", out)
227                imgload_space = re.search(r"Available space after image load is (\d+)", out)
228                imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
229                keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
230                cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
231
232                if initial_space:
233                    isp = int(initial_space.group(1))
234                    logger.info("Available space initially: %i MiB", (isp)/(2**20))
235                    if imgload_space:
236                        sp = int(imgload_space.group(1))
237                        logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
238                    if imgupgrade_space:
239                        sp = int(imgupgrade_space.group(1))
240                        logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
241                    if keepdocker_space:
242                        sp = int(keepdocker_space.group(1))
243                        logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
244
245                if cleanup_space:
246                    sp = int(cleanup_space.group(1))
247                    logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
248
249                if proc.returncode != 0:
250                    logger.error("Failed with return code %i", proc.returncode)
251                    logger.error("--- Stdout ---\n%s", out)
252                    logger.error("--- Stderr ---\n%s", err)
253                    raise MigrationFailed()
254
255                if args.verbose:
256                    logger.info("--- Stdout ---\n%s", out)
257                    logger.info("--- Stderr ---\n%s", err)
258
259            migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
260            if migrated:
261                newcol = CollectionReader(migrated.group(1))
262
263                api_client.links().create(body={"link": {
264                    'owner_uuid': sys_uuid,
265                    'link_class': _migration_link_class,
266                    'name': _migration_link_name,
267                    'tail_uuid': oldcol.portable_data_hash(),
268                    'head_uuid': newcol.portable_data_hash()
269                    }}).execute(num_retries=3)
270
271                logger.info("Migrated '%s' (%s) to '%s' (%s) in %is",
272                            oldcol.portable_data_hash(), old_image["collection"],
273                            newcol.portable_data_hash(), migrated.group(1),
274                            time.time() - start)
275                already_migrated.add(oldcol.portable_data_hash())
276                success.append(old_image["collection"])
277            else:
278                logger.error("Error migrating '%s'", old_image["collection"])
279                failures.append(old_image["collection"])
280        except Exception as e:
281            logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start,
282                         exc_info=(not isinstance(e, MigrationFailed)))
283            failures.append(old_image["collection"])
284        finally:
285            shutil.rmtree(varlibdocker)
286            shutil.rmtree(dockercache)
287
288    logger.info("Successfully migrated %i images", len(success))
289    if failures:
290        logger.error("Failed to migrate %i images", len(failures))

Docker image format migration tool for Arvados.

This converts Docker images stored in Arvados from image format v1 (Docker <= 1.9) to image format v2 (Docker >= 1.10).

Requires Docker running on the local host.

Usage:

1) Run arvados/docker/migrate-docker19/build.sh to create arvados/migrate-docker19 Docker image.

2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.

3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).

This will query Arvados for v1 format Docker images. For each image that does not already have a corresponding v2 format image (as indicated by a docker_image_migration tag) it will perform the following process:

i) download the image from Arvados ii) load it into Docker iii) update the Docker version, which updates the image iv) save the v2 format image and upload to Arvados v) create a migration link