Wharfie

a modern, docker-like, embedded image builder

Brought to you by: ingo
MIT License
[r60]: / trunk / wharfie / lib / files.py Maximize Restore History

259 lines (246 with data), 8.7 kB

#!/usr/bin/python
#
# Copyright 2017 Ingo Hornberger <ingo_@gmx.net>
#
# This software is licensed under the MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be included in all copies
# or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
################################################################################
#
# We want to allow the same features in most every actions. Like download
# links for input files in FROM, ADD, COPY, ...
# If an archive is found, which contains only one file, it will be recursively
# extracted.
#
# Or support several archive formats, like *.img, *.a, *.tar, *.tar.gz
#
# This library is implementing those generic access functions.
#
# Note that we are trying to be as compatible as necessary (not as possible ;) )
# to the Dockerfile format. But we will try to avoid the faults, docker did.
#
# So this is a list of planned deviations from Dockerfile:
#
# Issue 1)
# - ADD supports URLs and archives. But docker doesn't support these two in
#   combination. We will do.
#
# - ADD is able to extract an archive. But you can't just extract a part of
#   it.
#
# => These two limitations are leading people to not use ADD, but use curl + tar
#    + s.th. else inside of the docker container instead. Which is bad
#    practice.
#
# Solution 1)
#
# - Archive extract and URLs can be combined
# - Every URL can have "options", which are seperated by a pipe. These options
#   are containing the files, partitions, etc. which the user want's to extract.
#   e.g.:
#   ADD http://mypage.com/files/myrootfs.tar|tar=./etc/fstab|tar=./etc/passwd /
#
# It's keen to want everything to use the same infrastructure and therefore
# support the same features. But anyway we have to maintain the following
# differences between API functions...
#
# Issue 2)
# - FROM extracts everything, except tar* archives
# - ADD extracts everything, except tar* archives (as tar archives are extracted
#   while handling the output, not while handling the input)
# - COPY extracts nothing
# - Anyway, all of them support downloads
#
# Solution 2)
# - we add a parameter to input_file, which specifies an optional parameter to
#   enable extracting of everything supported, except tar archives.
# Note, that we can only support extraction of archives, that contain only one
# file (at least after applying its options). Otherwhise we don't get a simple
# data flow.
#
# Issue 3)
# - Whe extracting over a few indirections, we can't predict which format will
#   come out in the end. So it is nearly impossible to define a good name for
#   the build target. But, as the name of the build target also has to contain
#   the exact file format, we will have to specify the output format in such
#   cases.
#
# Solution 3)
# - The output format is one of the options, which has to be passed to the
#   command when we want to trigger the whole extraction chain.
#   e.g.:
#   FROM https://downloads.raspberrypi.org/raspbian_lite_latest|img=p1|format=tar
#
#   In this example, the exact chain is:
#   raspbian_lite_latest -> *.zip -> *.img -> *.tar
#
#   The image rule has the effect, that partition 1 is packed into a tar archive.
#   This is always the rule, to "extract" an image. If you want, you can then
#   continue extracting this to other formats.
#
################################################################################
import os
import urllib
import binascii
from lib import makefile as make

archive_formats = ["zip", "a", "img"]

def split_escape(string, delimiter):
    if len(delimiter) != 1:
        raise ValueError('Invalid delimiter: ' + delimiter)
    ln = len(string)
    i = 0
    j = 0
    while j < ln:
        if string[j] == '\\':
            if j + 1 >= ln:
                yield string[i:j]
                return
            j += 1
        elif string[j] == delimiter:
            yield string[i:j]
            i = j + 1
        j += 1
    yield string[i:j]

def execute(command):
    process = Popen(
        args=command,
        stdout=PIPE,
        shell=True
    )
    return process.communicate()[0]

    
def download(url):
    # check redirection to get the correct file extension
    fileh = urllib.urlopen(url)
    redir = fileh.geturl()
    if redir != '':
        url = redir
    name, ext = os.path.splitext(url)
    name = format(0xFFFFFFFF & binascii.crc32(url), '02X') + ext
    cmd = "wget --continue --output-document=$@ '%s'" % (url)
    makeTarget = {
        'comment' : "download %s" % (url),
        'name': name,
        'dep': '',
        'simplecmd': cmd
    };
    make.makeTargets.append(makeTarget);
    #os.system(cmd)
    return name

def extract(url, options):
    if "format" not in options:
        print ("error: format not specified for url, which should be extracted")
        print ("url: %s" % url)
        return url
    ext = options["format"]
    name = format(0xFFFFFFFF & binascii.crc32(url), '02X') + "." + ext

    o_zip = ""
    o_a = ""
    o_img = ""
    if "zip" in options:
        o_zip = options["zip"]
    if "a" in options:
        o_a = options["a"]
    if "img" in options:
        o_img = options["img"]
    cmd='''filename="$<"; \\
format="tar"; \\
output="$@"; \\
while true; do \\
	extension="$${filename##*.}"; \\
	if [ "x$${extension}" = "x$${format}" ]; then \\
		echo "info: extraction chain successful, output format found."; \\
		mv $${filename} $${output}; \\
		break; \\
	fi; \\
	case $${extension} in \\
		zip) \\
		    options="%s"; \\
		    n=$$(zipinfo -1 $${filename} $${options}| wc -l); \\
		    if [ "$${n}" = "1" ]; then \\
			f=$$(zipinfo -1 $${options} $${filename}); \\
			unzip -p $${filename} $${options} > $${f}; \\
			filename=$${f}; \\
		    fi; \\
		    ;; \\
		a) \\
		    options="%s"; \\
		    n=$$(ar t $${filename} $${options}| wc -l); \\
		    if [ "$${n}" = "1" ]; then \\
			f=$(ar t $${filename} $${options}); \\
			ar x $${filename} $${options}; \\
			filename=$${f}; \\
		    fi; \\
		    ;; \\
		img) \\
		    options="%s"; \\
		    d=$$(basename $$(mktemp -d)); \\
		    l=$$(sudo kpartx -l $${filename} | sed -n '/loop/ s,.*/dev/\\(loop[0-9]\\+\\).*,\\1, p;q;'); \\
		    echo sed -n '/loop/ s,.*/dev/\\\\(loop[0-9]\\\\+\\\\).*,\\\\1, pq;'; \\
		    if [ "x$${d}" != "x" -a "x$${l}" != "x" ]; then \\
			f="$${filename}.tar"; \\
			sudo kpartx -as $${filename}; \\
			sudo mount /dev/mapper/$${l}$${options} /tmp/$${d}; \\
			(cd /tmp/$${d}; sudo tar -cf - .) > $${f}; \\
			sudo umount /tmp/$${d}; \\
			sudo kpartx -d $${filename}; \\
			rmdir /tmp/$${d}; \\
			filename=$${f}; \\
		    else \\
		        echo "internal error: kpartx returned false or problem with tempdir occured"; \\
		        break; \\
		    fi; \\
		    ;; \\
		*) \\
		    echo "error: stopping extracting on $${filename} with ext $${extension}"; \\
		    break; \\
		    ;; \\
	esac; \\
done''' % (o_zip, o_a, o_img)
    makeTarget = {
        'comment' : "extract %s (options: %s)" % (url, options),
        'name': name,
        'dep': [url],
        'simplecmd': cmd
    };
    make.makeTargets.append(makeTarget);
    return name
    
def input_file(url):
    parts = list(split_escape(url, "|"))
    url = parts[0].lstrip()
    options = dict()
    if len(parts) > 1:
        for p in parts[1:]:
            o = p.split("=")
            key = o[0]
            o = "=".join(o[1:])
            if key in options:
                options[key] += " " + o
            else:
                options[key] = o

    # need to download?

    if url.startswith("http://") or url.startswith("https://") or url.startswith("ftp://"):
        name = download(url)
    else:
        name = url

    # Extract files if it is an archive.
    parts = os.path.splitext(name)
    ext = parts[1][1:]
    if ext in archive_formats:
        name = extract(name, options)

    return name