--- a +++ b/trunk/wharfie/lib/files.py @@ -0,0 +1,258 @@ +#!/usr/bin/python +# +# Copyright 2017 Ingo Hornberger <ingo_@gmx.net> +# +# This software is licensed under the MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be included in all copies +# or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. +# +################################################################################ +# +# We want to allow the same features in most every actions. Like download +# links for input files in FROM, ADD, COPY, ... +# If an archive is found, which contains only one file, it will be recursively +# extracted. +# +# Or support several archive formats, like *.img, *.a, *.tar, *.tar.gz +# +# This library is implementing those generic access functions. +# +# Note that we are trying to be as compatible as necessary (not as possible ;) ) +# to the Dockerfile format. But we will try to avoid the faults, docker did. +# +# So this is a list of planned deviations from Dockerfile: +# +# Issue 1) +# - ADD supports URLs and archives. But docker doesn't support these two in +# combination. We will do. +# +# - ADD is able to extract an archive. But you can't just extract a part of +# it. +# +# => These two limitations are leading people to not use ADD, but use curl + tar +# + s.th. else inside of the docker container instead. Which is bad +# practice. +# +# Solution 1) +# +# - Archive extract and URLs can be combined +# - Every URL can have "options", which are seperated by a pipe. These options +# are containing the files, partitions, etc. which the user want's to extract. +# e.g.: +# ADD http://mypage.com/files/myrootfs.tar|tar=./etc/fstab|tar=./etc/passwd / +# +# It's keen to want everything to use the same infrastructure and therefore +# support the same features. But anyway we have to maintain the following +# differences between API functions... +# +# Issue 2) +# - FROM extracts everything, except tar* archives +# - ADD extracts everything, except tar* archives (as tar archives are extracted +# while handling the output, not while handling the input) +# - COPY extracts nothing +# - Anyway, all of them support downloads +# +# Solution 2) +# - we add a parameter to input_file, which specifies an optional parameter to +# enable extracting of everything supported, except tar archives. +# Note, that we can only support extraction of archives, that contain only one +# file (at least after applying its options). Otherwhise we don't get a simple +# data flow. +# +# Issue 3) +# - Whe extracting over a few indirections, we can't predict which format will +# come out in the end. So it is nearly impossible to define a good name for +# the build target. But, as the name of the build target also has to contain +# the exact file format, we will have to specify the output format in such +# cases. +# +# Solution 3) +# - The output format is one of the options, which has to be passed to the +# command when we want to trigger the whole extraction chain. +# e.g.: +# FROM https://downloads.raspberrypi.org/raspbian_lite_latest|img=p1|format=tar +# +# In this example, the exact chain is: +# raspbian_lite_latest -> *.zip -> *.img -> *.tar +# +# The image rule has the effect, that partition 1 is packed into a tar archive. +# This is always the rule, to "extract" an image. If you want, you can then +# continue extracting this to other formats. +# +################################################################################ +import os +import urllib +import binascii +from lib import makefile as make + +archive_formats = ["zip", "a", "img"] + +def split_escape(string, delimiter): + if len(delimiter) != 1: + raise ValueError('Invalid delimiter: ' + delimiter) + ln = len(string) + i = 0 + j = 0 + while j < ln: + if string[j] == '\\': + if j + 1 >= ln: + yield string[i:j] + return + j += 1 + elif string[j] == delimiter: + yield string[i:j] + i = j + 1 + j += 1 + yield string[i:j] + +def execute(command): + process = Popen( + args=command, + stdout=PIPE, + shell=True + ) + return process.communicate()[0] + + +def download(url): + # check redirection to get the correct file extension + fileh = urllib.urlopen(url) + redir = fileh.geturl() + if redir != '': + url = redir + name, ext = os.path.splitext(url) + name = format(0xFFFFFFFF & binascii.crc32(url), '02X') + ext + cmd = "wget --continue --output-document=$@ '%s'" % (url) + makeTarget = { + 'comment' : "download %s" % (url), + 'name': name, + 'dep': '', + 'simplecmd': cmd + }; + make.makeTargets.append(makeTarget); + #os.system(cmd) + return name + +def extract(url, options): + if "format" not in options: + print ("error: format not specified for url, which should be extracted") + print ("url: %s" % url) + return url + ext = options["format"] + name = format(0xFFFFFFFF & binascii.crc32(url), '02X') + "." + ext + + o_zip = "" + o_a = "" + o_img = "" + if "zip" in options: + o_zip = options["zip"] + if "a" in options: + o_a = options["a"] + if "img" in options: + o_img = options["img"] + cmd='''filename="$<"; \\ +format="tar"; \\ +output="$@"; \\ +while true; do \\ + extension="$${filename##*.}"; \\ + if [ "x$${extension}" = "x$${format}" ]; then \\ + echo "info: extraction chain successful, output format found."; \\ + mv $${filename} $${output}; \\ + break; \\ + fi; \\ + case $${extension} in \\ + zip) \\ + options="%s"; \\ + n=$$(zipinfo -1 $${filename} $${options}| wc -l); \\ + if [ "$${n}" = "1" ]; then \\ + f=$$(zipinfo -1 $${options} $${filename}); \\ + unzip -p $${filename} $${options} > $${f}; \\ + filename=$${f}; \\ + fi; \\ + ;; \\ + a) \\ + options="%s"; \\ + n=$$(ar t $${filename} $${options}| wc -l); \\ + if [ "$${n}" = "1" ]; then \\ + f=$(ar t $${filename} $${options}); \\ + ar x $${filename} $${options}; \\ + filename=$${f}; \\ + fi; \\ + ;; \\ + img) \\ + options="%s"; \\ + d=$$(basename $$(mktemp -d)); \\ + l=$$(sudo kpartx -l $${filename} | sed -n '/loop/ s,.*/dev/\\(loop[0-9]\\+\\).*,\\1, p;q;'); \\ + echo sed -n '/loop/ s,.*/dev/\\\\(loop[0-9]\\\\+\\\\).*,\\\\1, pq;'; \\ + if [ "x$${d}" != "x" -a "x$${l}" != "x" ]; then \\ + f="$${filename}.tar"; \\ + sudo kpartx -as $${filename}; \\ + sudo mount /dev/mapper/$${l}$${options} /tmp/$${d}; \\ + (cd /tmp/$${d}; sudo tar -cf - .) > $${f}; \\ + sudo umount /tmp/$${d}; \\ + sudo kpartx -d $${filename}; \\ + rmdir /tmp/$${d}; \\ + filename=$${f}; \\ + else \\ + echo "internal error: kpartx returned false or problem with tempdir occured"; \\ + break; \\ + fi; \\ + ;; \\ + *) \\ + echo "error: stopping extracting on $${filename} with ext $${extension}"; \\ + break; \\ + ;; \\ + esac; \\ +done''' % (o_zip, o_a, o_img) + makeTarget = { + 'comment' : "extract %s (options: %s)" % (url, options), + 'name': name, + 'dep': [url], + 'simplecmd': cmd + }; + make.makeTargets.append(makeTarget); + return name + +def input_file(url): + parts = list(split_escape(url, "|")) + url = parts[0].lstrip() + options = dict() + if len(parts) > 1: + for p in parts[1:]: + o = p.split("=") + key = o[0] + o = "=".join(o[1:]) + if key in options: + options[key] += " " + o + else: + options[key] = o + + # need to download? + + if url.startswith("http://") or url.startswith("https://") or url.startswith("ftp://"): + name = download(url) + else: + name = url + + # Extract files if it is an archive. + parts = os.path.splitext(name) + ext = parts[1][1:] + if ext in archive_formats: + name = extract(name, options) + + return name