Expand source code
def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
global api_client
if stdout is sys.stdout and hasattr(stdout, 'buffer'):
# in Python 3, write to stdout as binary
stdout = stdout.buffer
args = parse_arguments(arguments, stdout, stderr)
logger.setLevel(logging.WARNING - 10 * args.v)
request_id = arvados.util.new_request_id()
logger.info('X-Request-Id: '+request_id)
if api_client is None:
api_client = arvados.api('v1', request_id=request_id)
r = re.search(r'^(.*?)(/.*)?$', args.locator)
col_loc = r.group(1)
get_prefix = r.group(2)
if args.r and not get_prefix:
get_prefix = os.sep
# User asked to download the collection's manifest
if not get_prefix:
if not args.n:
open_flags = os.O_CREAT | os.O_WRONLY
if not args.f:
open_flags |= os.O_EXCL
try:
if args.destination == "-":
write_block_or_manifest(
dest=stdout, src=col_loc,
api_client=api_client, args=args)
else:
out_fd = os.open(args.destination, open_flags)
with os.fdopen(out_fd, 'wb') as out_file:
write_block_or_manifest(
dest=out_file, src=col_loc,
api_client=api_client, args=args)
except (IOError, OSError) as error:
logger.error("can't write to '{}': {}".format(args.destination, error))
return 1
except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
logger.error("failed to download '{}': {}".format(col_loc, error))
return 1
except arvados.errors.ArgumentError as error:
if 'Argument to CollectionReader' in str(error):
logger.error("error reading collection: {}".format(error))
return 1
else:
raise
return 0
try:
reader = arvados.CollectionReader(
col_loc, api_client=api_client, num_retries=args.retries)
except Exception as error:
logger.error("failed to read collection: {}".format(error))
return 1
# Scan the collection. Make an array of (stream, file, local
# destination filename) tuples, and add up total size to extract.
todo = []
todo_bytes = 0
try:
if get_prefix == os.sep:
item = reader
else:
item = reader.find('.' + get_prefix)
if isinstance(item, arvados.collection.Subcollection) or isinstance(item, arvados.collection.CollectionReader):
# If the user asked for a file and we got a subcollection, error out.
if get_prefix[-1] != os.sep:
logger.error("requested file '{}' is in fact a subcollection. Append a trailing '/' to download it.".format('.' + get_prefix))
return 1
# If the user asked stdout as a destination, error out.
elif args.destination == '-':
logger.error("cannot use 'stdout' as destination when downloading multiple files.")
return 1
# User asked for a subcollection, and that's what was found. Add up total size
# to download.
for s, f in files_in_collection(item):
dest_path = os.path.join(
args.destination,
os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
if (not (args.n or args.f or args.skip_existing) and
os.path.exists(dest_path)):
logger.error('Local file %s already exists.' % (dest_path,))
return 1
todo += [(s, f, dest_path)]
todo_bytes += f.size()
elif isinstance(item, arvados.arvfile.ArvadosFile):
todo += [(item.parent, item, args.destination)]
todo_bytes += item.size()
else:
logger.error("'{}' not found.".format('.' + get_prefix))
return 1
except (IOError, arvados.errors.NotFoundError) as e:
logger.error(e)
return 1
out_bytes = 0
for s, f, outfilename in todo:
outfile = None
digestor = None
if not args.n:
if outfilename == "-":
outfile = stdout
else:
if args.skip_existing and os.path.exists(outfilename):
logger.debug('Local file %s exists. Skipping.', outfilename)
continue
elif not args.f and (os.path.isfile(outfilename) or
os.path.isdir(outfilename)):
# Good thing we looked again: apparently this file wasn't
# here yet when we checked earlier.
logger.error('Local file %s already exists.' % (outfilename,))
return 1
if args.r:
arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
try:
outfile = open(outfilename, 'wb')
except Exception as error:
logger.error('Open(%s) failed: %s' % (outfilename, error))
return 1
if args.hash:
digestor = hashlib.new(args.hash)
try:
with s.open(f.name, 'rb') as file_reader:
for data in file_reader.readall():
if outfile:
outfile.write(data)
if digestor:
digestor.update(data)
out_bytes += len(data)
if args.progress:
stderr.write('\r%d MiB / %d MiB %.1f%%' %
(out_bytes >> 20,
todo_bytes >> 20,
(100
if todo_bytes==0
else 100.0*out_bytes/todo_bytes)))
elif args.batch_progress:
stderr.write('%s %d read %d total\n' %
(sys.argv[0], os.getpid(),
out_bytes, todo_bytes))
if digestor:
stderr.write("%s %s/%s\n"
% (digestor.hexdigest(), s.stream_name(), f.name))
except KeyboardInterrupt:
if outfile and (outfile.fileno() > 2) and not outfile.closed:
os.unlink(outfile.name)
break
finally:
if outfile != None and outfile != stdout:
outfile.close()
if args.progress:
stderr.write('\n')
return 0