Zippy for managing container type files in repo.
This commit is contained in:
parent
cfdf09a93e
commit
b4237ffbae
198
util/zippey.py
Normal file
198
util/zippey.py
Normal file
@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (c) 2014, Sippey Fun Lab
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of the Sippey Fun Lab nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# Zippey: A Git filter for friendly handling of ZIP-based files
|
||||
#
|
||||
# There are many types of ZIP-based files, such as Microsoft Office .docx,
|
||||
# .xlsx, .pptx files, OpenOffice .odt files and jar files, that contains
|
||||
# plaintext content but not really tractable by git due to compression smears
|
||||
# parts that have been modified and parts that remain the same across commit.
|
||||
# This prevent Git from versioning these files and treat them as a new binary
|
||||
# blob every time the file is saved.
|
||||
#
|
||||
# Zippey is a Git filter that un-zip zip-based file into a simple text format
|
||||
# during git add/commit ("clean" process) and recover the original zip-based
|
||||
# file after git checkout ("smudge" process). Since diff is taken on the
|
||||
# "cleaned" file after file is added, it is likely real changes to file can be
|
||||
# reflected by original git diff command.
|
||||
#
|
||||
# The text format is defined as a series of records. Each records represent a
|
||||
# file in the original zip-based file, which is composed of two parts,
|
||||
# a header that contains meta file and a body that contains data. The header
|
||||
# is a few data fields segmented by pipe character like this:
|
||||
#
|
||||
# length|raw_length|type|filename
|
||||
#
|
||||
# where length is an ascii coded integer of the following data section, raw_length
|
||||
# is the orginal length of data (if transformation is taken), type can be A for
|
||||
# text data or B for binary data, and filename is the original file name
|
||||
# including path if the zip-based file contains directories. Immediately after
|
||||
# the header, there is a carriage return ('\n'), follows "length" byte of
|
||||
# data, and then another CR and then the next recor, i,e,
|
||||
#
|
||||
# [header1]\n[data1]\n[header2]\n[data2] ...
|
||||
#
|
||||
# There are two types of data section. If the file contains only text data,
|
||||
# its content is copied to data section without any change, otherwise, data
|
||||
# is base64 coded to ensure the entire file is text format.
|
||||
#
|
||||
#
|
||||
# Author: Sippey (sippey@gmail.com)
|
||||
# Date: Apr.18, 2014
|
||||
#
|
||||
# Modified by Kristian Hoey Horsberg <khh1990 ' at ' gmail.com>
|
||||
# to make python 3 compatible
|
||||
# Date May 20th 2014
|
||||
#
|
||||
|
||||
import zipfile
|
||||
import sys
|
||||
import io
|
||||
import base64
|
||||
import string
|
||||
import tempfile
|
||||
import os.path
|
||||
|
||||
DEBUG_ZIPPEY = False
|
||||
NAME = 'Zippey'
|
||||
ENCODING = 'UTF-8'
|
||||
|
||||
|
||||
def debug(msg):
|
||||
'''Print debug message'''
|
||||
if DEBUG_ZIPPEY:
|
||||
sys.stderr.write('{0}: debug: {1}\n'.format(NAME, msg))
|
||||
|
||||
def error(msg):
|
||||
'''Print error message'''
|
||||
sys.stderr.write('{0}: error: {1}\n'.format(NAME, msg))
|
||||
|
||||
def init():
|
||||
'''Initialize writing; set binary mode for windows'''
|
||||
debug("Running on {}".format(sys.platform))
|
||||
if sys.platform.startswith('win'):
|
||||
import msvcrt
|
||||
debug("Enable Windows binary workaround")
|
||||
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
|
||||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||
|
||||
def encode(input, output):
|
||||
'''Encode into special VCS friendly format from input to output'''
|
||||
debug("ENCODE was called")
|
||||
tfp = tempfile.TemporaryFile(mode='w+b')
|
||||
tfp.write(input.read())
|
||||
zfp = zipfile.ZipFile(tfp, "r")
|
||||
for name in zfp.namelist():
|
||||
data = zfp.read(name)
|
||||
text_extentions = ['.txt', '.html', '.xml']
|
||||
extention = os.path.splitext(name)[1][1:].strip().lower()
|
||||
try:
|
||||
# Check if text data
|
||||
data.decode(ENCODING)
|
||||
try:
|
||||
strdata = map(chr, data)
|
||||
except TypeError:
|
||||
strdata = data
|
||||
if extention not in text_extentions and not all(c in string.printable for c in strdata):
|
||||
raise UnicodeDecodeError(ENCODING, "".encode(ENCODING), 0, 1, "Artificial exception")
|
||||
|
||||
# Encode
|
||||
debug("Appending text file '{}'".format(name))
|
||||
output.write("{}|{}|A|{}\n".format(len(data), len(data), name).encode(ENCODING))
|
||||
output.write(data)
|
||||
output.write("\n".encode(ENCODING)) # Separation from next meta line
|
||||
except UnicodeDecodeError:
|
||||
# Binary data
|
||||
debug("Appending binary file '{}'".format(name))
|
||||
raw_len = len(data)
|
||||
data = base64.b64encode(data)
|
||||
output.write("{}|{}|B|{}\n".format(len(data), raw_len, name).encode(ENCODING))
|
||||
output.write(data)
|
||||
output.write("\n".encode(ENCODING)) # Separation from next meta line
|
||||
zfp.close()
|
||||
tfp.close()
|
||||
|
||||
def decode(input, output):
|
||||
'''Decode from special VCS friendly format from input to output'''
|
||||
debug("DECODE was called")
|
||||
tfp = tempfile.TemporaryFile(mode='w+b')
|
||||
zfp = zipfile.ZipFile(tfp, "w", zipfile.ZIP_DEFLATED)
|
||||
|
||||
while True:
|
||||
meta = input.readline().decode(ENCODING)
|
||||
if not meta:
|
||||
break
|
||||
|
||||
(data_len, raw_len, mode, name) = [t(s) for (t, s) in zip((int, int, str, str), meta.split('|'))]
|
||||
if mode == 'A':
|
||||
debug("Appending text file '{}'".format(name))
|
||||
zfp.writestr(name.rstrip(), input.read(data_len))
|
||||
input.read(1) # Skip last '\n'
|
||||
elif mode == 'B':
|
||||
debug("Appending binary file '{}'".format(name.rstrip()))
|
||||
zfp.writestr(name.rstrip(), base64.b64decode(input.read(data_len)))
|
||||
input.read(1) # Skip last '\n'
|
||||
else:
|
||||
# Should never reach here
|
||||
zfp.close()
|
||||
tfp.close()
|
||||
error('Illegal mode "{}"'.format(mode))
|
||||
sys.exit(1)
|
||||
|
||||
# Flush all writes
|
||||
zfp.close()
|
||||
|
||||
# Write output
|
||||
tfp.seek(0)
|
||||
output.write(tfp.read())
|
||||
tfp.close()
|
||||
|
||||
def main():
|
||||
'''Main program'''
|
||||
init()
|
||||
|
||||
input = io.open(sys.stdin.fileno(), 'rb')
|
||||
output = io.open(sys.stdout.fileno(), 'wb')
|
||||
|
||||
if len(sys.argv) < 2 or sys.argv[1] == '-' or sys.argv[1] == '--help':
|
||||
sys.stdout.write("{}\nTo encode: 'python zippey.py e'\nTo decode: 'python zippey.py d'\nAll files read from stdin and printed to stdout\n".format(NAME))
|
||||
elif sys.argv[1] == 'e':
|
||||
encode(input, output)
|
||||
elif sys.argv[1] == 'd':
|
||||
decode(input, output)
|
||||
else:
|
||||
error("Illegal argument '{}'. Try --help for more information".format(sys.argv[1]))
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user