Wharfie

a modern, docker-like, embedded image builder

Brought to you by: ingo
MIT License
Diff of /trunk/wharfie/lib/files.py [000000] .. [r16] Maximize Restore

Switch to side-by-side view

--- a
+++ b/trunk/wharfie/lib/files.py
@@ -0,0 +1,258 @@
+#!/usr/bin/python
+#
+# Copyright 2017 Ingo Hornberger <ingo_@gmx.net>
+#
+# This software is licensed under the MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies
+# or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+#
+################################################################################
+#
+# We want to allow the same features in most every actions. Like download
+# links for input files in FROM, ADD, COPY, ...
+# If an archive is found, which contains only one file, it will be recursively
+# extracted.
+#
+# Or support several archive formats, like *.img, *.a, *.tar, *.tar.gz
+#
+# This library is implementing those generic access functions.
+#
+# Note that we are trying to be as compatible as necessary (not as possible ;) )
+# to the Dockerfile format. But we will try to avoid the faults, docker did.
+#
+# So this is a list of planned deviations from Dockerfile:
+#
+# Issue 1)
+# - ADD supports URLs and archives. But docker doesn't support these two in
+#   combination. We will do.
+#
+# - ADD is able to extract an archive. But you can't just extract a part of
+#   it.
+#
+# => These two limitations are leading people to not use ADD, but use curl + tar
+#    + s.th. else inside of the docker container instead. Which is bad
+#    practice.
+#
+# Solution 1)
+#
+# - Archive extract and URLs can be combined
+# - Every URL can have "options", which are seperated by a pipe. These options
+#   are containing the files, partitions, etc. which the user want's to extract.
+#   e.g.:
+#   ADD http://mypage.com/files/myrootfs.tar|tar=./etc/fstab|tar=./etc/passwd /
+#
+# It's keen to want everything to use the same infrastructure and therefore
+# support the same features. But anyway we have to maintain the following
+# differences between API functions...
+#
+# Issue 2)
+# - FROM extracts everything, except tar* archives
+# - ADD extracts everything, except tar* archives (as tar archives are extracted
+#   while handling the output, not while handling the input)
+# - COPY extracts nothing
+# - Anyway, all of them support downloads
+#
+# Solution 2)
+# - we add a parameter to input_file, which specifies an optional parameter to
+#   enable extracting of everything supported, except tar archives.
+# Note, that we can only support extraction of archives, that contain only one
+# file (at least after applying its options). Otherwhise we don't get a simple
+# data flow.
+#
+# Issue 3)
+# - Whe extracting over a few indirections, we can't predict which format will
+#   come out in the end. So it is nearly impossible to define a good name for
+#   the build target. But, as the name of the build target also has to contain
+#   the exact file format, we will have to specify the output format in such
+#   cases.
+#
+# Solution 3)
+# - The output format is one of the options, which has to be passed to the
+#   command when we want to trigger the whole extraction chain.
+#   e.g.:
+#   FROM https://downloads.raspberrypi.org/raspbian_lite_latest|img=p1|format=tar
+#
+#   In this example, the exact chain is:
+#   raspbian_lite_latest -> *.zip -> *.img -> *.tar
+#
+#   The image rule has the effect, that partition 1 is packed into a tar archive.
+#   This is always the rule, to "extract" an image. If you want, you can then
+#   continue extracting this to other formats.
+#
+################################################################################
+import os
+import urllib
+import binascii
+from lib import makefile as make
+
+archive_formats = ["zip", "a", "img"]
+
+def split_escape(string, delimiter):
+    if len(delimiter) != 1:
+        raise ValueError('Invalid delimiter: ' + delimiter)
+    ln = len(string)
+    i = 0
+    j = 0
+    while j < ln:
+        if string[j] == '\\':
+            if j + 1 >= ln:
+                yield string[i:j]
+                return
+            j += 1
+        elif string[j] == delimiter:
+            yield string[i:j]
+            i = j + 1
+        j += 1
+    yield string[i:j]
+
+def execute(command):
+    process = Popen(
+        args=command,
+        stdout=PIPE,
+        shell=True
+    )
+    return process.communicate()[0]
+
+    
+def download(url):
+    # check redirection to get the correct file extension
+    fileh = urllib.urlopen(url)
+    redir = fileh.geturl()
+    if redir != '':
+        url = redir
+    name, ext = os.path.splitext(url)
+    name = format(0xFFFFFFFF & binascii.crc32(url), '02X') + ext
+    cmd = "wget --continue --output-document=$@ '%s'" % (url)
+    makeTarget = {
+        'comment' : "download %s" % (url),
+        'name': name,
+        'dep': '',
+        'simplecmd': cmd
+    };
+    make.makeTargets.append(makeTarget);
+    #os.system(cmd)
+    return name
+
+def extract(url, options):
+    if "format" not in options:
+        print ("error: format not specified for url, which should be extracted")
+        print ("url: %s" % url)
+        return url
+    ext = options["format"]
+    name = format(0xFFFFFFFF & binascii.crc32(url), '02X') + "." + ext
+
+    o_zip = ""
+    o_a = ""
+    o_img = ""
+    if "zip" in options:
+        o_zip = options["zip"]
+    if "a" in options:
+        o_a = options["a"]
+    if "img" in options:
+        o_img = options["img"]
+    cmd='''filename="$<"; \\
+format="tar"; \\
+output="$@"; \\
+while true; do \\
+	extension="$${filename##*.}"; \\
+	if [ "x$${extension}" = "x$${format}" ]; then \\
+		echo "info: extraction chain successful, output format found."; \\
+		mv $${filename} $${output}; \\
+		break; \\
+	fi; \\
+	case $${extension} in \\
+		zip) \\
+		    options="%s"; \\
+		    n=$$(zipinfo -1 $${filename} $${options}| wc -l); \\
+		    if [ "$${n}" = "1" ]; then \\
+			f=$$(zipinfo -1 $${options} $${filename}); \\
+			unzip -p $${filename} $${options} > $${f}; \\
+			filename=$${f}; \\
+		    fi; \\
+		    ;; \\
+		a) \\
+		    options="%s"; \\
+		    n=$$(ar t $${filename} $${options}| wc -l); \\
+		    if [ "$${n}" = "1" ]; then \\
+			f=$(ar t $${filename} $${options}); \\
+			ar x $${filename} $${options}; \\
+			filename=$${f}; \\
+		    fi; \\
+		    ;; \\
+		img) \\
+		    options="%s"; \\
+		    d=$$(basename $$(mktemp -d)); \\
+		    l=$$(sudo kpartx -l $${filename} | sed -n '/loop/ s,.*/dev/\\(loop[0-9]\\+\\).*,\\1, p;q;'); \\
+		    echo sed -n '/loop/ s,.*/dev/\\\\(loop[0-9]\\\\+\\\\).*,\\\\1, pq;'; \\
+		    if [ "x$${d}" != "x" -a "x$${l}" != "x" ]; then \\
+			f="$${filename}.tar"; \\
+			sudo kpartx -as $${filename}; \\
+			sudo mount /dev/mapper/$${l}$${options} /tmp/$${d}; \\
+			(cd /tmp/$${d}; sudo tar -cf - .) > $${f}; \\
+			sudo umount /tmp/$${d}; \\
+			sudo kpartx -d $${filename}; \\
+			rmdir /tmp/$${d}; \\
+			filename=$${f}; \\
+		    else \\
+		        echo "internal error: kpartx returned false or problem with tempdir occured"; \\
+		        break; \\
+		    fi; \\
+		    ;; \\
+		*) \\
+		    echo "error: stopping extracting on $${filename} with ext $${extension}"; \\
+		    break; \\
+		    ;; \\
+	esac; \\
+done''' % (o_zip, o_a, o_img)
+    makeTarget = {
+        'comment' : "extract %s (options: %s)" % (url, options),
+        'name': name,
+        'dep': [url],
+        'simplecmd': cmd
+    };
+    make.makeTargets.append(makeTarget);
+    return name
+    
+def input_file(url):
+    parts = list(split_escape(url, "|"))
+    url = parts[0].lstrip()
+    options = dict()
+    if len(parts) > 1:
+        for p in parts[1:]:
+            o = p.split("=")
+            key = o[0]
+            o = "=".join(o[1:])
+            if key in options:
+                options[key] += " " + o
+            else:
+                options[key] = o
+
+    # need to download?
+
+    if url.startswith("http://") or url.startswith("https://") or url.startswith("ftp://"):
+        name = download(url)
+    else:
+        name = url
+
+    # Extract files if it is an archive.
+    parts = os.path.splitext(name)
+    ext = parts[1][1:]
+    if ext in archive_formats:
+        name = extract(name, options)
+
+    return name