arvados.commands.migrate19
1# Copyright (C) The Arvados Authors. All rights reserved. 2# 3# SPDX-License-Identifier: Apache-2.0 4 5from __future__ import print_function 6from __future__ import division 7import argparse 8import time 9import sys 10import logging 11import shutil 12import tempfile 13import os 14import subprocess 15import re 16 17import arvados 18import arvados.commands.keepdocker 19from arvados._version import __version__ 20from arvados.collection import CollectionReader 21from .. import util 22 23logger = logging.getLogger('arvados.migrate-docker19') 24logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG') 25 else logging.INFO) 26 27_migration_link_class = 'docker_image_migration' 28_migration_link_name = 'migrate_1.9_1.10' 29 30class MigrationFailed(Exception): 31 pass 32 33@util._deprecated('3.0') 34def main(arguments=None): 35 """Docker image format migration tool for Arvados. 36 37 This converts Docker images stored in Arvados from image format v1 38 (Docker <= 1.9) to image format v2 (Docker >= 1.10). 39 40 Requires Docker running on the local host. 41 42 Usage: 43 44 1) Run arvados/docker/migrate-docker19/build.sh to create 45 arvados/migrate-docker19 Docker image. 46 47 2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate. 48 49 3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container). 50 51 This will query Arvados for v1 format Docker images. For each image that 52 does not already have a corresponding v2 format image (as indicated by a 53 docker_image_migration tag) it will perform the following process: 54 55 i) download the image from Arvados 56 ii) load it into Docker 57 iii) update the Docker version, which updates the image 58 iv) save the v2 format image and upload to Arvados 59 v) create a migration link 60 61 """ 62 63 migrate19_parser = argparse.ArgumentParser() 64 migrate19_parser.add_argument( 65 '--version', action='version', version="%s %s" % (sys.argv[0], __version__), 66 help='Print version and exit.') 67 migrate19_parser.add_argument( 68 '--verbose', action="store_true", help="Print stdout/stderr even on success") 69 migrate19_parser.add_argument( 70 '--force', action="store_true", help="Try to migrate even if there isn't enough space") 71 72 migrate19_parser.add_argument( 73 '--storage-driver', type=str, default="overlay", 74 help="Docker storage driver, e.g. aufs, overlay, vfs") 75 76 exgroup = migrate19_parser.add_mutually_exclusive_group() 77 exgroup.add_argument( 78 '--dry-run', action='store_true', help="Print number of pending migrations.") 79 exgroup.add_argument( 80 '--print-unmigrated', action='store_true', 81 default=False, help="Print list of images needing migration.") 82 83 migrate19_parser.add_argument('--tempdir', help="Set temporary directory") 84 85 migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), 86 default=None, help="List of images to be migrated") 87 88 args = migrate19_parser.parse_args(arguments) 89 90 if args.tempdir: 91 tempfile.tempdir = args.tempdir 92 93 if args.verbose: 94 logger.setLevel(logging.DEBUG) 95 96 only_migrate = None 97 if args.infile: 98 only_migrate = set() 99 for l in args.infile: 100 only_migrate.add(l.strip()) 101 102 api_client = arvados.api() 103 104 user = api_client.users().current().execute() 105 if not user['is_admin']: 106 raise Exception("This command requires an admin token") 107 sys_uuid = user['uuid'][:12] + '000000000000000' 108 109 images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3) 110 111 is_new = lambda img: img['dockerhash'].startswith('sha256:') 112 113 count_new = 0 114 old_images = [] 115 for uuid, img in images: 116 if img["dockerhash"].startswith("sha256:"): 117 continue 118 key = (img["repo"], img["tag"], img["timestamp"]) 119 old_images.append(img) 120 121 migration_links = arvados.util.list_all(api_client.links().list, filters=[ 122 ['link_class', '=', _migration_link_class], 123 ['name', '=', _migration_link_name], 124 ]) 125 126 already_migrated = set() 127 for m in migration_links: 128 already_migrated.add(m["tail_uuid"]) 129 130 items = arvados.util.list_all(api_client.collections().list, 131 filters=[["uuid", "in", [img["collection"] for img in old_images]]], 132 select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"]) 133 uuid_to_collection = {i["uuid"]: i for i in items} 134 135 need_migrate = {} 136 totalbytes = 0 137 biggest = 0 138 biggest_pdh = None 139 for img in old_images: 140 i = uuid_to_collection[img["collection"]] 141 pdh = i["portable_data_hash"] 142 if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate): 143 need_migrate[pdh] = img 144 with CollectionReader(i["manifest_text"]) as c: 145 size = list(c.values())[0].size() 146 if size > biggest: 147 biggest = size 148 biggest_pdh = pdh 149 totalbytes += size 150 151 152 if args.storage_driver == "vfs": 153 will_need = (biggest*20) 154 else: 155 will_need = (biggest*2.5) 156 157 if args.print_unmigrated: 158 only_migrate = set() 159 for pdh in need_migrate: 160 print(pdh) 161 return 162 163 logger.info("Already migrated %i images", len(already_migrated)) 164 logger.info("Need to migrate %i images", len(need_migrate)) 165 logger.info("Using tempdir %s", tempfile.gettempdir()) 166 logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20) 167 logger.info("Total data to migrate about %i MiB", totalbytes>>20) 168 169 df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()]) 170 ln = df_out.splitlines()[1] 171 filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1) 172 if int(available) <= will_need: 173 logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, int(will_need)>>20) 174 if not args.force: 175 exit(1) 176 else: 177 logger.warn("--force provided, will migrate anyway") 178 179 if args.dry_run: 180 return 181 182 success = [] 183 failures = [] 184 count = 1 185 for old_image in list(need_migrate.values()): 186 if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated: 187 continue 188 189 oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"]) 190 tarfile = list(oldcol.keys())[0] 191 192 logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"], 193 old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20) 194 count += 1 195 start = time.time() 196 197 varlibdocker = tempfile.mkdtemp() 198 dockercache = tempfile.mkdtemp() 199 try: 200 with tempfile.NamedTemporaryFile() as envfile: 201 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST"))) 202 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN"))) 203 if arvados.config.get("ARVADOS_API_HOST_INSECURE"): 204 envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE"))) 205 envfile.flush() 206 207 dockercmd = ["docker", "run", 208 "--privileged", 209 "--rm", 210 "--env-file", envfile.name, 211 "--volume", "%s:/var/lib/docker" % varlibdocker, 212 "--volume", "%s:/root/.cache/arvados/docker" % dockercache, 213 "arvados/migrate-docker19:1.0", 214 "/root/migrate.sh", 215 "%s/%s" % (old_image["collection"], tarfile), 216 tarfile[0:40], 217 old_image["repo"], 218 old_image["tag"], 219 uuid_to_collection[old_image["collection"]]["owner_uuid"], 220 args.storage_driver] 221 222 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 223 out, err = proc.communicate() 224 225 initial_space = re.search(r"Initial available space is (\d+)", out) 226 imgload_space = re.search(r"Available space after image load is (\d+)", out) 227 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out) 228 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out) 229 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out) 230 231 if initial_space: 232 isp = int(initial_space.group(1)) 233 logger.info("Available space initially: %i MiB", (isp)/(2**20)) 234 if imgload_space: 235 sp = int(imgload_space.group(1)) 236 logger.debug("Used after load: %i MiB", (isp-sp)/(2**20)) 237 if imgupgrade_space: 238 sp = int(imgupgrade_space.group(1)) 239 logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20)) 240 if keepdocker_space: 241 sp = int(keepdocker_space.group(1)) 242 logger.info("Used after upload: %i MiB", (isp-sp)/(2**20)) 243 244 if cleanup_space: 245 sp = int(cleanup_space.group(1)) 246 logger.debug("Available after cleanup: %i MiB", (sp)/(2**20)) 247 248 if proc.returncode != 0: 249 logger.error("Failed with return code %i", proc.returncode) 250 logger.error("--- Stdout ---\n%s", out) 251 logger.error("--- Stderr ---\n%s", err) 252 raise MigrationFailed() 253 254 if args.verbose: 255 logger.info("--- Stdout ---\n%s", out) 256 logger.info("--- Stderr ---\n%s", err) 257 258 migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out) 259 if migrated: 260 newcol = CollectionReader(migrated.group(1)) 261 262 api_client.links().create(body={"link": { 263 'owner_uuid': sys_uuid, 264 'link_class': _migration_link_class, 265 'name': _migration_link_name, 266 'tail_uuid': oldcol.portable_data_hash(), 267 'head_uuid': newcol.portable_data_hash() 268 }}).execute(num_retries=3) 269 270 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is", 271 oldcol.portable_data_hash(), old_image["collection"], 272 newcol.portable_data_hash(), migrated.group(1), 273 time.time() - start) 274 already_migrated.add(oldcol.portable_data_hash()) 275 success.append(old_image["collection"]) 276 else: 277 logger.error("Error migrating '%s'", old_image["collection"]) 278 failures.append(old_image["collection"]) 279 except Exception as e: 280 logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start, 281 exc_info=(not isinstance(e, MigrationFailed))) 282 failures.append(old_image["collection"]) 283 finally: 284 shutil.rmtree(varlibdocker) 285 shutil.rmtree(dockercache) 286 287 logger.info("Successfully migrated %i images", len(success)) 288 if failures: 289 logger.error("Failed to migrate %i images", len(failures))
Common base class for all non-exit exceptions.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- args
34@util._deprecated('3.0') 35def main(arguments=None): 36 """Docker image format migration tool for Arvados. 37 38 This converts Docker images stored in Arvados from image format v1 39 (Docker <= 1.9) to image format v2 (Docker >= 1.10). 40 41 Requires Docker running on the local host. 42 43 Usage: 44 45 1) Run arvados/docker/migrate-docker19/build.sh to create 46 arvados/migrate-docker19 Docker image. 47 48 2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate. 49 50 3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container). 51 52 This will query Arvados for v1 format Docker images. For each image that 53 does not already have a corresponding v2 format image (as indicated by a 54 docker_image_migration tag) it will perform the following process: 55 56 i) download the image from Arvados 57 ii) load it into Docker 58 iii) update the Docker version, which updates the image 59 iv) save the v2 format image and upload to Arvados 60 v) create a migration link 61 62 """ 63 64 migrate19_parser = argparse.ArgumentParser() 65 migrate19_parser.add_argument( 66 '--version', action='version', version="%s %s" % (sys.argv[0], __version__), 67 help='Print version and exit.') 68 migrate19_parser.add_argument( 69 '--verbose', action="store_true", help="Print stdout/stderr even on success") 70 migrate19_parser.add_argument( 71 '--force', action="store_true", help="Try to migrate even if there isn't enough space") 72 73 migrate19_parser.add_argument( 74 '--storage-driver', type=str, default="overlay", 75 help="Docker storage driver, e.g. aufs, overlay, vfs") 76 77 exgroup = migrate19_parser.add_mutually_exclusive_group() 78 exgroup.add_argument( 79 '--dry-run', action='store_true', help="Print number of pending migrations.") 80 exgroup.add_argument( 81 '--print-unmigrated', action='store_true', 82 default=False, help="Print list of images needing migration.") 83 84 migrate19_parser.add_argument('--tempdir', help="Set temporary directory") 85 86 migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), 87 default=None, help="List of images to be migrated") 88 89 args = migrate19_parser.parse_args(arguments) 90 91 if args.tempdir: 92 tempfile.tempdir = args.tempdir 93 94 if args.verbose: 95 logger.setLevel(logging.DEBUG) 96 97 only_migrate = None 98 if args.infile: 99 only_migrate = set() 100 for l in args.infile: 101 only_migrate.add(l.strip()) 102 103 api_client = arvados.api() 104 105 user = api_client.users().current().execute() 106 if not user['is_admin']: 107 raise Exception("This command requires an admin token") 108 sys_uuid = user['uuid'][:12] + '000000000000000' 109 110 images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3) 111 112 is_new = lambda img: img['dockerhash'].startswith('sha256:') 113 114 count_new = 0 115 old_images = [] 116 for uuid, img in images: 117 if img["dockerhash"].startswith("sha256:"): 118 continue 119 key = (img["repo"], img["tag"], img["timestamp"]) 120 old_images.append(img) 121 122 migration_links = arvados.util.list_all(api_client.links().list, filters=[ 123 ['link_class', '=', _migration_link_class], 124 ['name', '=', _migration_link_name], 125 ]) 126 127 already_migrated = set() 128 for m in migration_links: 129 already_migrated.add(m["tail_uuid"]) 130 131 items = arvados.util.list_all(api_client.collections().list, 132 filters=[["uuid", "in", [img["collection"] for img in old_images]]], 133 select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"]) 134 uuid_to_collection = {i["uuid"]: i for i in items} 135 136 need_migrate = {} 137 totalbytes = 0 138 biggest = 0 139 biggest_pdh = None 140 for img in old_images: 141 i = uuid_to_collection[img["collection"]] 142 pdh = i["portable_data_hash"] 143 if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate): 144 need_migrate[pdh] = img 145 with CollectionReader(i["manifest_text"]) as c: 146 size = list(c.values())[0].size() 147 if size > biggest: 148 biggest = size 149 biggest_pdh = pdh 150 totalbytes += size 151 152 153 if args.storage_driver == "vfs": 154 will_need = (biggest*20) 155 else: 156 will_need = (biggest*2.5) 157 158 if args.print_unmigrated: 159 only_migrate = set() 160 for pdh in need_migrate: 161 print(pdh) 162 return 163 164 logger.info("Already migrated %i images", len(already_migrated)) 165 logger.info("Need to migrate %i images", len(need_migrate)) 166 logger.info("Using tempdir %s", tempfile.gettempdir()) 167 logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20) 168 logger.info("Total data to migrate about %i MiB", totalbytes>>20) 169 170 df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()]) 171 ln = df_out.splitlines()[1] 172 filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1) 173 if int(available) <= will_need: 174 logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, int(will_need)>>20) 175 if not args.force: 176 exit(1) 177 else: 178 logger.warn("--force provided, will migrate anyway") 179 180 if args.dry_run: 181 return 182 183 success = [] 184 failures = [] 185 count = 1 186 for old_image in list(need_migrate.values()): 187 if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated: 188 continue 189 190 oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"]) 191 tarfile = list(oldcol.keys())[0] 192 193 logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"], 194 old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20) 195 count += 1 196 start = time.time() 197 198 varlibdocker = tempfile.mkdtemp() 199 dockercache = tempfile.mkdtemp() 200 try: 201 with tempfile.NamedTemporaryFile() as envfile: 202 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST"))) 203 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN"))) 204 if arvados.config.get("ARVADOS_API_HOST_INSECURE"): 205 envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE"))) 206 envfile.flush() 207 208 dockercmd = ["docker", "run", 209 "--privileged", 210 "--rm", 211 "--env-file", envfile.name, 212 "--volume", "%s:/var/lib/docker" % varlibdocker, 213 "--volume", "%s:/root/.cache/arvados/docker" % dockercache, 214 "arvados/migrate-docker19:1.0", 215 "/root/migrate.sh", 216 "%s/%s" % (old_image["collection"], tarfile), 217 tarfile[0:40], 218 old_image["repo"], 219 old_image["tag"], 220 uuid_to_collection[old_image["collection"]]["owner_uuid"], 221 args.storage_driver] 222 223 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 224 out, err = proc.communicate() 225 226 initial_space = re.search(r"Initial available space is (\d+)", out) 227 imgload_space = re.search(r"Available space after image load is (\d+)", out) 228 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out) 229 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out) 230 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out) 231 232 if initial_space: 233 isp = int(initial_space.group(1)) 234 logger.info("Available space initially: %i MiB", (isp)/(2**20)) 235 if imgload_space: 236 sp = int(imgload_space.group(1)) 237 logger.debug("Used after load: %i MiB", (isp-sp)/(2**20)) 238 if imgupgrade_space: 239 sp = int(imgupgrade_space.group(1)) 240 logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20)) 241 if keepdocker_space: 242 sp = int(keepdocker_space.group(1)) 243 logger.info("Used after upload: %i MiB", (isp-sp)/(2**20)) 244 245 if cleanup_space: 246 sp = int(cleanup_space.group(1)) 247 logger.debug("Available after cleanup: %i MiB", (sp)/(2**20)) 248 249 if proc.returncode != 0: 250 logger.error("Failed with return code %i", proc.returncode) 251 logger.error("--- Stdout ---\n%s", out) 252 logger.error("--- Stderr ---\n%s", err) 253 raise MigrationFailed() 254 255 if args.verbose: 256 logger.info("--- Stdout ---\n%s", out) 257 logger.info("--- Stderr ---\n%s", err) 258 259 migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out) 260 if migrated: 261 newcol = CollectionReader(migrated.group(1)) 262 263 api_client.links().create(body={"link": { 264 'owner_uuid': sys_uuid, 265 'link_class': _migration_link_class, 266 'name': _migration_link_name, 267 'tail_uuid': oldcol.portable_data_hash(), 268 'head_uuid': newcol.portable_data_hash() 269 }}).execute(num_retries=3) 270 271 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is", 272 oldcol.portable_data_hash(), old_image["collection"], 273 newcol.portable_data_hash(), migrated.group(1), 274 time.time() - start) 275 already_migrated.add(oldcol.portable_data_hash()) 276 success.append(old_image["collection"]) 277 else: 278 logger.error("Error migrating '%s'", old_image["collection"]) 279 failures.append(old_image["collection"]) 280 except Exception as e: 281 logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start, 282 exc_info=(not isinstance(e, MigrationFailed))) 283 failures.append(old_image["collection"]) 284 finally: 285 shutil.rmtree(varlibdocker) 286 shutil.rmtree(dockercache) 287 288 logger.info("Successfully migrated %i images", len(success)) 289 if failures: 290 logger.error("Failed to migrate %i images", len(failures))
Docker image format migration tool for Arvados.
This converts Docker images stored in Arvados from image format v1 (Docker <= 1.9) to image format v2 (Docker >= 1.10).
Requires Docker running on the local host.
Usage:
1) Run arvados/docker/migrate-docker19/build.sh to create arvados/migrate-docker19 Docker image.
2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate.
3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container).
This will query Arvados for v1 format Docker images. For each image that does not already have a corresponding v2 format image (as indicated by a docker_image_migration tag) it will perform the following process:
i) download the image from Arvados ii) load it into Docker iii) update the Docker version, which updates the image iv) save the v2 format image and upload to Arvados v) create a migration link