From c44c3864d8ed64bcb48204546bd4e1f31b3f25c9 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Tue, 4 Jan 2022 10:16:32 -0800 Subject: [PATCH 1/3] Render to jpeg and keep that if smaller than png On a real-world exam, rendered from larger scanned-produced poor-quality jpegs-in-pdf, this gives a filesize reduction of about 43MiB -> 15MiB. Some of that might be wasted effort on jpeg artifacts so the savings are perhaps less significant with better scans. But the code takes whichever is better so shouldn't hurt (except it takes longer). --- plom/scan/scansToImages.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/plom/scan/scansToImages.py b/plom/scan/scansToImages.py index 46a42d3b5..3b4bdb7ee 100644 --- a/plom/scan/scansToImages.py +++ b/plom/scan/scansToImages.py @@ -206,10 +206,21 @@ def processFileToBitmaps(file_name, dest, *, do_not_extract=False, debug_jpeg=Fa files.append(outname) continue - # TODO: experiment with jpg: generate both and see which is smaller? - outname = dest / (basename + ".png") - pix.save(outname) - files.append(outname) + pngname = dest / (basename + ".png") + jpgname = dest / (basename + ".jpg") + # TODO: pil_save 10% smaller but 2x-3x slower, Issue #1866 + pix.save(pngname) + # pix.pil_save(pngname, optimize=True) + # TODO: add progressive=True? + pix.pil_save(jpgname, quality=90, optimize=True, subsampling=0) + # Keep the jpeg if its at least a little smaller + if jpgname.stat().st_size < 0.9 * pngname.stat().st_size: + pngname.unlink() + files.append(jpgname) + else: + jpgname.unlink() + files.append(pngname) + # WebP here is also an option, Issue #1864. assert len(files) == len(doc), "Expected one image per page" doc.close() return files -- GitLab From 87b8540c5670743d0e06379ebfea4d15ed7e16f7 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Tue, 4 Jan 2022 10:22:06 -0800 Subject: [PATCH 2/3] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0391527bd..59c864134 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed * Potential memory leaks in Annotator, Manager, and dialogs. * JPEG support is no longer restricted to sizes in multiples of 16, better rotation support. +* Improved disc-space usage due to JPEG use in more places. * Removing or adding pages is more selective about which annotations are invalidated. * A large number of modal dialog fixes. * Other misc changes. -- GitLab From cd92c7fae039637be20cd856738af769e9556148 Mon Sep 17 00:00:00 2001 From: "Colin B. Macdonald" Date: Tue, 4 Jan 2022 10:36:18 -0800 Subject: [PATCH 3/3] Comment what subsampling=0 is for --- plom/scan/scansToImages.py | 1 + 1 file changed, 1 insertion(+) diff --git a/plom/scan/scansToImages.py b/plom/scan/scansToImages.py index 3b4bdb7ee..0793fcdb0 100644 --- a/plom/scan/scansToImages.py +++ b/plom/scan/scansToImages.py @@ -212,6 +212,7 @@ def processFileToBitmaps(file_name, dest, *, do_not_extract=False, debug_jpeg=Fa pix.save(pngname) # pix.pil_save(pngname, optimize=True) # TODO: add progressive=True? + # Note subsampling off to avoid mucking with red hairlines pix.pil_save(jpgname, quality=90, optimize=True, subsampling=0) # Keep the jpeg if its at least a little smaller if jpgname.stat().st_size < 0.9 * pngname.stat().st_size: -- GitLab