Package arvados :: Package commands :: Module migrate19
[hide private]
[frames] | no frames]

Source Code for Module arvados.commands.migrate19

  1  # Copyright (C) The Arvados Authors. All rights reserved. 
  2  # 
  3  # SPDX-License-Identifier: Apache-2.0 
  4   
  5  from __future__ import print_function 
  6  from __future__ import division 
  7  import argparse 
  8  import time 
  9  import sys 
 10  import logging 
 11  import shutil 
 12  import tempfile 
 13  import os 
 14  import subprocess 
 15  import re 
 16   
 17  import arvados 
 18  import arvados.commands.keepdocker 
 19  from arvados._version import __version__ 
 20  from arvados.collection import CollectionReader 
 21   
 22  logger = logging.getLogger('arvados.migrate-docker19') 
 23  logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG') 
 24                  else logging.INFO) 
 25   
 26  _migration_link_class = 'docker_image_migration' 
 27  _migration_link_name = 'migrate_1.9_1.10' 
 28   
29 -class MigrationFailed(Exception):
30 pass
31
32 -def main(arguments=None):
33 """Docker image format migration tool for Arvados. 34 35 This converts Docker images stored in Arvados from image format v1 36 (Docker <= 1.9) to image format v2 (Docker >= 1.10). 37 38 Requires Docker running on the local host. 39 40 Usage: 41 42 1) Run arvados/docker/migrate-docker19/build.sh to create 43 arvados/migrate-docker19 Docker image. 44 45 2) Set ARVADOS_API_HOST and ARVADOS_API_TOKEN to the cluster you want to migrate. 46 47 3) Run arv-migrate-docker19 from the Arvados Python SDK on the host (not in a container). 48 49 This will query Arvados for v1 format Docker images. For each image that 50 does not already have a corresponding v2 format image (as indicated by a 51 docker_image_migration tag) it will perform the following process: 52 53 i) download the image from Arvados 54 ii) load it into Docker 55 iii) update the Docker version, which updates the image 56 iv) save the v2 format image and upload to Arvados 57 v) create a migration link 58 59 """ 60 61 migrate19_parser = argparse.ArgumentParser() 62 migrate19_parser.add_argument( 63 '--version', action='version', version="%s %s" % (sys.argv[0], __version__), 64 help='Print version and exit.') 65 migrate19_parser.add_argument( 66 '--verbose', action="store_true", help="Print stdout/stderr even on success") 67 migrate19_parser.add_argument( 68 '--force', action="store_true", help="Try to migrate even if there isn't enough space") 69 70 migrate19_parser.add_argument( 71 '--storage-driver', type=str, default="overlay", 72 help="Docker storage driver, e.g. aufs, overlay, vfs") 73 74 exgroup = migrate19_parser.add_mutually_exclusive_group() 75 exgroup.add_argument( 76 '--dry-run', action='store_true', help="Print number of pending migrations.") 77 exgroup.add_argument( 78 '--print-unmigrated', action='store_true', 79 default=False, help="Print list of images needing migration.") 80 81 migrate19_parser.add_argument('--tempdir', help="Set temporary directory") 82 83 migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), 84 default=None, help="List of images to be migrated") 85 86 args = migrate19_parser.parse_args(arguments) 87 88 if args.tempdir: 89 tempfile.tempdir = args.tempdir 90 91 if args.verbose: 92 logger.setLevel(logging.DEBUG) 93 94 only_migrate = None 95 if args.infile: 96 only_migrate = set() 97 for l in args.infile: 98 only_migrate.add(l.strip()) 99 100 api_client = arvados.api() 101 102 user = api_client.users().current().execute() 103 if not user['is_admin']: 104 raise Exception("This command requires an admin token") 105 sys_uuid = user['uuid'][:12] + '000000000000000' 106 107 images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3) 108 109 is_new = lambda img: img['dockerhash'].startswith('sha256:') 110 111 count_new = 0 112 old_images = [] 113 for uuid, img in images: 114 if img["dockerhash"].startswith("sha256:"): 115 continue 116 key = (img["repo"], img["tag"], img["timestamp"]) 117 old_images.append(img) 118 119 migration_links = arvados.util.list_all(api_client.links().list, filters=[ 120 ['link_class', '=', _migration_link_class], 121 ['name', '=', _migration_link_name], 122 ]) 123 124 already_migrated = set() 125 for m in migration_links: 126 already_migrated.add(m["tail_uuid"]) 127 128 items = arvados.util.list_all(api_client.collections().list, 129 filters=[["uuid", "in", [img["collection"] for img in old_images]]], 130 select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"]) 131 uuid_to_collection = {i["uuid"]: i for i in items} 132 133 need_migrate = {} 134 totalbytes = 0 135 biggest = 0 136 biggest_pdh = None 137 for img in old_images: 138 i = uuid_to_collection[img["collection"]] 139 pdh = i["portable_data_hash"] 140 if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate): 141 need_migrate[pdh] = img 142 with CollectionReader(i["manifest_text"]) as c: 143 size = list(c.values())[0].size() 144 if size > biggest: 145 biggest = size 146 biggest_pdh = pdh 147 totalbytes += size 148 149 150 if args.storage_driver == "vfs": 151 will_need = (biggest*20) 152 else: 153 will_need = (biggest*2.5) 154 155 if args.print_unmigrated: 156 only_migrate = set() 157 for pdh in need_migrate: 158 print(pdh) 159 return 160 161 logger.info("Already migrated %i images", len(already_migrated)) 162 logger.info("Need to migrate %i images", len(need_migrate)) 163 logger.info("Using tempdir %s", tempfile.gettempdir()) 164 logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20) 165 logger.info("Total data to migrate about %i MiB", totalbytes>>20) 166 167 df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()]) 168 ln = df_out.splitlines()[1] 169 filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1) 170 if int(available) <= will_need: 171 logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, int(will_need)>>20) 172 if not args.force: 173 exit(1) 174 else: 175 logger.warn("--force provided, will migrate anyway") 176 177 if args.dry_run: 178 return 179 180 success = [] 181 failures = [] 182 count = 1 183 for old_image in list(need_migrate.values()): 184 if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated: 185 continue 186 187 oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"]) 188 tarfile = list(oldcol.keys())[0] 189 190 logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"], 191 old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20) 192 count += 1 193 start = time.time() 194 195 varlibdocker = tempfile.mkdtemp() 196 dockercache = tempfile.mkdtemp() 197 try: 198 with tempfile.NamedTemporaryFile() as envfile: 199 envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST"))) 200 envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN"))) 201 if arvados.config.get("ARVADOS_API_HOST_INSECURE"): 202 envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE"))) 203 envfile.flush() 204 205 dockercmd = ["docker", "run", 206 "--privileged", 207 "--rm", 208 "--env-file", envfile.name, 209 "--volume", "%s:/var/lib/docker" % varlibdocker, 210 "--volume", "%s:/root/.cache/arvados/docker" % dockercache, 211 "arvados/migrate-docker19:1.0", 212 "/root/migrate.sh", 213 "%s/%s" % (old_image["collection"], tarfile), 214 tarfile[0:40], 215 old_image["repo"], 216 old_image["tag"], 217 uuid_to_collection[old_image["collection"]]["owner_uuid"], 218 args.storage_driver] 219 220 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 221 out, err = proc.communicate() 222 223 initial_space = re.search(r"Initial available space is (\d+)", out) 224 imgload_space = re.search(r"Available space after image load is (\d+)", out) 225 imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out) 226 keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out) 227 cleanup_space = re.search(r"Available space after cleanup is (\d+)", out) 228 229 if initial_space: 230 isp = int(initial_space.group(1)) 231 logger.info("Available space initially: %i MiB", (isp)/(2**20)) 232 if imgload_space: 233 sp = int(imgload_space.group(1)) 234 logger.debug("Used after load: %i MiB", (isp-sp)/(2**20)) 235 if imgupgrade_space: 236 sp = int(imgupgrade_space.group(1)) 237 logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20)) 238 if keepdocker_space: 239 sp = int(keepdocker_space.group(1)) 240 logger.info("Used after upload: %i MiB", (isp-sp)/(2**20)) 241 242 if cleanup_space: 243 sp = int(cleanup_space.group(1)) 244 logger.debug("Available after cleanup: %i MiB", (sp)/(2**20)) 245 246 if proc.returncode != 0: 247 logger.error("Failed with return code %i", proc.returncode) 248 logger.error("--- Stdout ---\n%s", out) 249 logger.error("--- Stderr ---\n%s", err) 250 raise MigrationFailed() 251 252 if args.verbose: 253 logger.info("--- Stdout ---\n%s", out) 254 logger.info("--- Stderr ---\n%s", err) 255 256 migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out) 257 if migrated: 258 newcol = CollectionReader(migrated.group(1)) 259 260 api_client.links().create(body={"link": { 261 'owner_uuid': sys_uuid, 262 'link_class': _migration_link_class, 263 'name': _migration_link_name, 264 'tail_uuid': oldcol.portable_data_hash(), 265 'head_uuid': newcol.portable_data_hash() 266 }}).execute(num_retries=3) 267 268 logger.info("Migrated '%s' (%s) to '%s' (%s) in %is", 269 oldcol.portable_data_hash(), old_image["collection"], 270 newcol.portable_data_hash(), migrated.group(1), 271 time.time() - start) 272 already_migrated.add(oldcol.portable_data_hash()) 273 success.append(old_image["collection"]) 274 else: 275 logger.error("Error migrating '%s'", old_image["collection"]) 276 failures.append(old_image["collection"]) 277 except Exception as e: 278 logger.error("Failed to migrate %s in %is", old_image["collection"], time.time() - start, 279 exc_info=(not isinstance(e, MigrationFailed))) 280 failures.append(old_image["collection"]) 281 finally: 282 shutil.rmtree(varlibdocker) 283 shutil.rmtree(dockercache) 284 285 logger.info("Successfully migrated %i images", len(success)) 286 if failures: 287 logger.error("Failed to migrate %i images", len(failures))
288