pull/293/merge
Robert Felten 10 months ago committed by GitHub
commit f426c9c0be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -83,9 +83,23 @@ def getVersion():
def truncateFilename(other={}, filename=''):
""" Truncate filenames when downloading images with large filenames """
return filename[:other['filenamelimit']] + \
md5(filename.encode('utf-8')).hexdigest() + '.' + filename.split('.')[-1]
""" Truncate filename if longer than other['filenamelimit'] """
filename = unicode(filename)
if len(filename.encode('utf-8')) < other['filenamelimit']:
return filename
fileext = filename.split('.')
if len(fileext) == 1:
fileext = ""
else:
fileext = '.' + fileext[-1]
# make room for md5, file extension and imagesdescext
trunc = other['filenamelimit'] - 32 - len(fileext) - len(other['imagesdescext'])
assert (trunc > 0)
while len(filename[:trunc].encode('utf-8')) > other['filenamelimit']:
trunc -= 1
trunked_fn = filename[:trunc] + md5(filename.encode('utf-8')).hexdigest() + fileext
print 'Filename is too long, truncating. Now it is:', trunked_fn
return trunked_fn
def delay(config={}, session=None):
@ -1483,13 +1497,8 @@ def generateImageDump(config={}, other={}, images=[], start='', session=None):
delay(config=config, session=session)
# saving file
# truncate filename if length > 100 (100 + 32 (md5) = 132 < 143 (crash
# limit). Later .desc is added to filename, so better 100 as max)
filename2 = urllib.unquote(filename)
if len(filename2) > other['filenamelimit']:
# split last . (extension) and then merge
filename2 = truncateFilename(other=other, filename=filename2)
print 'Filename is too long, truncating. Now it is:', filename2
filename2 = truncateFilename(other=other, filename=filename2)
filename3 = u'%s/%s' % (imagepath, filename2)
imagefile = open(filename3, 'wb')
@ -1535,7 +1544,7 @@ def generateImageDump(config={}, other={}, images=[], start='', session=None):
text=u'The page "%s" was missing in the wiki (probably deleted)' % title
)
f = open('%s/%s.desc' % (imagepath, filename2), 'w')
f = open('%s/%s%s' % (imagepath, filename2, other['imagesdescext']), 'w')
# <text xml:space="preserve" bytes="36">Banner featuring SG1, SGA, SGU teams</text>
if not re.search(r'</page>', xmlfiledesc):
# failure when retrieving desc? then save it as empty .desc
@ -1932,9 +1941,10 @@ def getParameters(params=[]):
other = {
'resume': args.resume,
'filenamelimit': 100, # do not change
'filenamelimit': 140, # encryptfs reduce the filename limit from 255 to ~148 chars :/
'force': args.force,
'session': session
'session': session,
'imagesdescext': '.desc'
}
# calculating path, if not defined by user with --path=

@ -0,0 +1,82 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This file is part of the wikiteam project.
#
# Copyright (C) 2017 Robert Felten - https://github.com/rfelten/
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # q&d import hack, sorry
import unittest
from dumpgenerator import truncateFilename
# This file is intended to test offline functionality of the dumpgenerator.py.
# For all other tests see test_dumpgenerator.py
class TestDumpgeneratorOffline(unittest.TestCase):
def setUp(self):
other = dict() # FIXME: get from dumpgenerator, but code base is a pre-OO mess
other['filenamelimit'] = 140 # encryptfs reduce the filename limit from 255 to ~148 chars :/
other['imagesdescext'] = '.desc'
self.other = other
def tearDown(self):
pass
def helper_truncateFilename(self, fn):
fn_trunc = truncateFilename(other=self.other, filename=fn)
self.assertLessEqual(len(fn_trunc), self.other['filenamelimit'],
"trunced filename '%s' len of %d exceed limit of %d." % (
fn_trunc, len(fn_trunc), self.other['filenamelimit']))
def test_truncateFilename1(self):
""" Test if truncFilename() obey other['filenamelimit'] - real world example 1"""
fn = u"Assortiment de différentes préparation à bases de légumes et féculents, bien sur servit avec de l'injara.JPG"
self.assertEqual(len(fn), 108)
self.assertEqual(len(fn.encode("utf-8")), 113) # chars like 'è' will extend length - this is maybe unexpected
self.helper_truncateFilename(fn)
def test_truncateFilename2(self):
""" Test if truncFilename() obey other['filenamelimit'] - longest valid name w/o file extension"""
fn = "A" * self.other['filenamelimit']
self.helper_truncateFilename(fn)
def test_truncateFilename3(self):
""" Test if truncFilename() obey other['filenamelimit'] - longest valid name w/ file extension"""
fn = "A" * self.other['filenamelimit']
fn = fn[:-4] + ".jpg"
self.helper_truncateFilename(fn)
def test_truncateFilename4(self):
""" Test if truncFilename() obey other['filenamelimit'] - valid name w/ file extension"""
fn = "A" * (self.other['filenamelimit'] / 2)
fn = fn[:-4] + ".jpg"
self.helper_truncateFilename(fn)
def test_truncateFilename5(self):
""" Test if truncFilename() obey other['filenamelimit'] - longest valid name w/ file extension (unicode)"""
fn = u"è" * self.other['filenamelimit']
fn = fn[:-4] + ".jpg"
self.helper_truncateFilename(fn)
if __name__ == '__main__':
unittest.main()
Loading…
Cancel
Save