Commit 9f157520 authored by Alessio Parma's avatar Alessio Parma

wip - pygmentize

parent a0453300
......@@ -5,6 +5,6 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="ScottPlot" Version="4.0.30" />
<PackageReference Include="ScottPlot" Version="4.0.31" />
</ItemGroup>
</Project>
\ No newline at end of file
......@@ -18,6 +18,6 @@
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="2.0.0" />
<PackageReference Include="Microsoft.Extensions.Options" Version="2.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
<PackageReference Include="PommaLabs.MimeTypes" Version="1.3.2" />
<PackageReference Include="PommaLabs.MimeTypes" Version="1.4.0" />
</ItemGroup>
</Project>
\ No newline at end of file
......@@ -51,6 +51,12 @@ public TempFileDTO(string contentType)
/// </summary>
public string Extension { get; }
/// <summary>
/// Checks whether this file encoding is binary.
/// </summary>
/// <returns>True if this file encoding is binary, false otherwise.</returns>
public bool IsBinary => MimeTypeMap.GetEncoding(ContentType) == MimeEncoding.Base64;
/// <summary>
/// Checks whether this file represents an HTML document.
/// </summary>
......@@ -59,7 +65,7 @@ public TempFileDTO(string contentType)
{
MimeTypeMap.Extensions.HTML => true,
MimeTypeMap.Extensions.XHTML => true,
_ => false,
_ => false
};
/// <summary>
......@@ -88,7 +94,17 @@ public TempFileDTO(string contentType)
MimeTypeMap.Extensions.ODT => true,
// Other formats:
MimeTypeMap.Extensions.RTF => true,
_ => false,
_ => false
};
/// <summary>
/// Checks whether this file represents a text document.
/// </summary>
/// <returns>True if this file represents a text document, false otherwise.</returns>
public bool IsTextDocument => !IsBinary && !IsHtmlDocument && Extension switch
{
MimeTypeMap.Extensions.SVG => false,
_ => true
};
/// <summary>
......@@ -102,7 +118,7 @@ public TempFileDTO(string contentType)
MimeTypeMap.Extensions.FLV => true,
MimeTypeMap.Extensions.MKV => true,
MimeTypeMap.Extensions.MP4 => true,
_ => false,
_ => false
};
/// <summary>
......
......@@ -16,7 +16,7 @@
<PackageReference Include="Lamar.Microsoft.DependencyInjection" Version="4.3.0" />
<PackageReference Include="linq2db" Version="2.9.8" />
<PackageReference Include="Microsoft.ApplicationInsights.AspNetCore" Version="2.14.0" />
<PackageReference Include="Microsoft.AspNetCore.Mvc.Razor.RuntimeCompilation" Version="3.1.3" />
<PackageReference Include="Microsoft.AspNetCore.Mvc.Razor.RuntimeCompilation" Version="3.1.4" />
<PackageReference Include="Microsoft.AspNetCore.Mvc.Versioning.ApiExplorer" Version="4.1.1" />
<PackageReference Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.10.8" PrivateAssets="All" />
<PackageReference Include="Microsoft.VisualStudio.Web.CodeGeneration.Design" Version="3.1.2" PrivateAssets="All" />
......@@ -26,8 +26,8 @@
<PackageReference Include="PommaLabs.KVLite.PostgreSql" Version="10.0.0" />
<PackageReference Include="PommaLabs.KVLite.SQLite" Version="10.0.0" />
<PackageReference Include="PommaLabs.KVLite.SqlServer" Version="10.0.0" />
<PackageReference Include="reCAPTCHA.AspNetCore" Version="3.0.2" />
<PackageReference Include="SecurityCodeScan" Version="3.5.0" PrivateAssets="All" />
<PackageReference Include="reCAPTCHA.AspNetCore" Version="3.0.3" />
<PackageReference Include="SecurityCodeScan" Version="3.5.2" PrivateAssets="All" />
<PackageReference Include="Serilog.AspNetCore" Version="3.2.0" />
<PackageReference Include="Serilog.Filters.Expressions" Version="2.1.0" />
<PackageReference Include="Serilog.Sinks.ApplicationInsights" Version="3.1.0" />
......
......@@ -39,6 +39,8 @@ namespace PommaLabs.Thumbnailer.Services.Managers.Thumbnail
/// </summary>
public sealed class ConcreteThumbnailManager : IThumbnailManager
{
private const string WkhtmlCommonFlags = "--quiet --disable-local-file-access --load-media-error-handling ignore --stop-slow-scripts";
private readonly ICommandManager _commandManager;
private readonly ILogger<ConcreteThumbnailManager> _logger;
private readonly ITempFileStore _tempFileStore;
......@@ -68,9 +70,17 @@ public sealed class ConcreteThumbnailManager : IThumbnailManager
if (file.IsHtmlDocument)
{
await ConvertHtmlToImageAsync(file, thumbnail, cancellationToken).ConfigureAwait(false);
await ConvertHtmlToImageAsync(file, thumbnail, widthPx, cancellationToken).ConfigureAwait(false);
file = thumbnail;
}
else if (file.IsTextDocument)
{
var html = await _tempFileStore.GetTempFileAsync(MimeTypeMap.TEXT.HTML, cancellationToken).ConfigureAwait(false);
await ConvertTextToHtmlAsync(file, html, cancellationToken).ConfigureAwait(false);
var pdf = await _tempFileStore.GetTempFileAsync(MimeTypeMap.APPLICATION.PDF, cancellationToken).ConfigureAwait(false);
await ConvertHtmlToPdfAsync(html, pdf, cancellationToken).ConfigureAwait(false);
file = pdf;
}
else if (file.IsOfficeDocument)
{
var pdf = await _tempFileStore.GetTempFileAsync(MimeTypeMap.APPLICATION.PDF, cancellationToken).ConfigureAwait(false);
......@@ -112,15 +122,16 @@ public sealed class ConcreteThumbnailManager : IThumbnailManager
return thumbnail;
}
private async Task ConvertHtmlToImageAsync(TempFileDTO file, TempFileDTO thumbnail, CancellationToken cancellationToken)
private async Task ConvertHtmlToImageAsync(TempFileDTO file, TempFileDTO image, int widthPx, CancellationToken cancellationToken)
{
try
{
const int ZoomFactor = 4;
await _commandManager.RunCommandAsync(
"wkhtmltoimage", $"-q --width 1024 --disable-smart-width --disable-local-file-access --load-media-error-handling ignore {file.Path} {thumbnail.Path}",
"wkhtmltoimage", $"{WkhtmlCommonFlags} --quality 100 --zoom {ZoomFactor} --width {widthPx * ZoomFactor} {file.Path} {image.Path}",
default, cancellationToken).ConfigureAwait(false);
}
catch (CommandException ex) when (ex.ExitCode == 1 && new FileInfo(thumbnail.Path).Length > 0)
catch (CommandException ex) when (ex.ExitCode == 1 && new FileInfo(image.Path).Length > 0)
{
// wkhtmltoimage seems to exit with code 1 when a linked resource (like an image or
// a script) does not exist. However, it generates the image as correct as
......@@ -129,6 +140,23 @@ private async Task ConvertHtmlToImageAsync(TempFileDTO file, TempFileDTO thumbna
}
}
private async Task ConvertHtmlToPdfAsync(TempFileDTO file, TempFileDTO pdf, CancellationToken cancellationToken)
{
try
{
await _commandManager.RunCommandAsync(
"wkhtmltopdf", $"{WkhtmlCommonFlags} --zoom 2 --dpi 96 {file.Path} {pdf.Path}",
default, cancellationToken).ConfigureAwait(false);
}
catch (CommandException ex) when (ex.ExitCode == 1 && new FileInfo(pdf.Path).Length > 0)
{
// wkhtmltopdf seems to exit with code 1 when a linked resource (like an image or a
// script) does not exist. However, it generates the image as correct as
// possible: therefore, it is better to ignore this kind of error.
_logger.LogWarning(ex, "An ignorable error occurred while executing wkhmtltopdf command");
}
}
private async Task ConvertOfficeToPdfAsync(TempFileDTO file, TempFileDTO pdf, CancellationToken cancellationToken)
{
// See: https://ask.libreoffice.org/en/question/7284/multiple-instances-for-c-automation/
......@@ -148,10 +176,17 @@ private async Task ConvertOfficeToPdfAsync(TempFileDTO file, TempFileDTO pdf, Ca
}
}
private async Task ConvertVideoToImageAsync(TempFileDTO file, TempFileDTO thumbnail, CancellationToken cancellationToken)
private async Task ConvertTextToHtmlAsync(TempFileDTO file, TempFileDTO html, CancellationToken cancellationToken)
{
await _commandManager.RunCommandAsync(
"pygmentize", $"-o {html.Path} -O \"full=true\" {file.Path}",
default, cancellationToken).ConfigureAwait(false);
}
private async Task ConvertVideoToImageAsync(TempFileDTO file, TempFileDTO image, CancellationToken cancellationToken)
{
await _commandManager.RunCommandAsync(
"ffmpeg", $"-y -v panic -i {file.Path} -vf thumbnail -frames:v 1 {thumbnail.Path}",
"ffmpeg", $"-y -v panic -i {file.Path} -vf thumbnail -frames:v 1 {image.Path}",
default, cancellationToken).ConfigureAwait(false);
}
}
......
......@@ -10,8 +10,8 @@
<ItemGroup>
<PackageReference Include="AsyncEnumerator" Version="4.0.2" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="3.1.3" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="3.1.3" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="3.1.4" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="3.1.4" />
<PackageReference Include="System.CommandLine.DragonFruit" Version="0.3.0-alpha.20214.1" />
</ItemGroup>
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Pygments
~~~~~~~~
Pygments is a syntax highlighting package written in Python.
It is a generic syntax highlighter suitable for use in code hosting, forums,
wikis or other applications that need to prettify source code. Highlights
are:
* a wide range of over 500 languages and other text formats is supported
* special attention is paid to details, increasing quality by a fair amount
* support for new languages and formats are added easily
* a number of output formats, presently HTML, LaTeX, RTF, SVG, all image \
formats that PIL supports and ANSI sequences
* it is usable as a command-line tool and as a library
:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from setuptools import setup, find_packages
from pygments import __version__
setup(
name = 'Pygments',
version = __version__,
url = 'https://pygments.org/',
license = 'BSD License',
author = 'Georg Brandl',
author_email = '[email protected]',
description = 'Pygments is a syntax highlighting package written in Python.',
long_description = __doc__,
keywords = 'syntax highlighting',
packages = find_packages(include=['pygments', 'pygments.*']),
entry_points = {
'console_scripts': ['pygmentize = pygments.cmdline:main'],
},
platforms = 'any',
zip_safe = False,
include_package_data = True,
python_requires='>=3.5',
classifiers = [
'License :: OSI Approved :: BSD License',
'Intended Audience :: Developers',
'Intended Audience :: End Users/Desktop',
'Intended Audience :: System Administrators',
'Development Status :: 6 - Mature',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'Operating System :: OS Independent',
'Topic :: Text Processing :: Filters',
'Topic :: Utilities',
],
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment