Commit 0bc8ba14 authored by Stephan Kreutzer's avatar Stephan Kreutzer

wakelet_to_pdf_1: Recursive download of wakes.

parent 637db242
/* Copyright (C) 2019 Stephan Kreutzer
*
* This file is part of wakelet_downloader_1.
*
* wakelet_downloader_1 is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License version 3 or any later version,
* as published by the Free Software Foundation.
*
* wakelet_downloader_1 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License 3 for more details.
*
* You should have received a copy of the GNU Affero General Public License 3
* along with wakelet_downloader_1. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @author Stephan Kreutzer
* @since 2019-08-05
*/
import java.io.File;
public class CardSource
{
public CardSource(int id, File cardFile)
{
this.id = id;
this.cardFile = cardFile;
}
public int GetId()
{
return this.id;
}
public File GetCardFile()
{
return this.cardFile;
}
protected int id;
protected File cardFile;
}
......@@ -13,4 +13,4 @@
# You should have received a copy of the GNU Affero General Public License 3
# along with this program. If not, see <http://www.gnu.org/licenses/>.
java xhtml_to_latex_1 ./output/ ./output_print/
java xhtml_to_latex_1 ./wake/ ./output_print/
......@@ -20,7 +20,7 @@ along with wakelet_downloader_1. If not, see <http://www.gnu.org/licenses/>.
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:template match="/">
<xsl:comment> This file was generated by jsonxml_preparation_stylesheet_cards.xsl of wakelet_downloader_1, which is free software licensed under the GNU Affero General Public License 3 or any later version (see https://gitlab.com/publishing-systems/clients/ and http://www.publishing-systems.org). </xsl:comment><xsl:text>&#xA;</xsl:text>
<xsl:comment> This file was generated by jsonxml_preparation_stylesheet_wake.xsl of wakelet_downloader_1, which is free software licensed under the GNU Affero General Public License 3 or any later version (see https://gitlab.com/publishing-systems/clients/ and http://www.publishing-systems.org). </xsl:comment><xsl:text>&#xA;</xsl:text>
<wakelet>
<wake>
<xsl:apply-templates/>
......
......@@ -13,4 +13,4 @@
# You should have received a copy of the GNU Affero General Public License 3
# along with this program. If not, see <http://www.gnu.org/licenses/>.
java wakelet_downloader_1 "https://wakelet.com/wake/013f9ec7-3e37-4dec-9c45-90ca5f1679be" ./output/
java wakelet_downloader_1 "https://wakelet.com/wake/013f9ec7-3e37-4dec-9c45-90ca5f1679be" .
......@@ -27,10 +27,7 @@ xhtml_to_latex_1: xhtml_to_latex_1.class
CardSource.class: CardSource.java
javac -encoding UTF-8 CardSource.java
wakelet_downloader_1.class: wakelet_downloader_1.java CardSource.class
wakelet_downloader_1.class: wakelet_downloader_1.java
javac -encoding UTF-8 wakelet_downloader_1.java
xhtml_to_latex_1.class: xhtml_to_latex_1.java
......@@ -39,4 +36,3 @@ xhtml_to_latex_1.class: xhtml_to_latex_1.java
clean:
rm -f wakelet_downloader_1.class
rm -f xhtml_to_latex_1.class
rm -f CardSource.class
......@@ -5,7 +5,7 @@
* wakelet_downloader_1 is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License version 3 or any later version,
* as published by the Free Software Foundation.
*
*s
* wakelet_downloader_1 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
......@@ -63,16 +63,16 @@ public class wakelet_downloader_1
{
if (args.length < 2)
{
System.out.println("Usage:\n\twakelet_downloader_1 wakelet-wake-url output-directory\n");
System.out.println("Usage:\n\twakelet_downloader_1 wakelet-wake-url target-directory\n");
System.exit(1);
}
String programPath = wakelet_downloader_1.class.getProtectionDomain().getCodeSource().getLocation().getPath();
this.programPath = wakelet_downloader_1.class.getProtectionDomain().getCodeSource().getLocation().getPath();
try
{
programPath = new File(programPath).getCanonicalPath() + File.separator;
programPath = URLDecoder.decode(programPath, "UTF-8");
this.programPath = new File(this.programPath).getCanonicalPath() + File.separator;
this.programPath = URLDecoder.decode(this.programPath, "UTF-8");
}
catch (UnsupportedEncodingException ex)
{
......@@ -85,22 +85,58 @@ public class wakelet_downloader_1
System.exit(1);
}
String cardsUrl = "https://api.wakelet.com/collections/";
return attemptDownload(args[0], args[1], this.programPath);
}
protected int attemptDownload(String wakeUrl, String outputParentPath, String tempParentPath)
{
return attemptDownload(wakeUrl, outputParentPath, tempParentPath, -1);
}
protected int attemptDownload(String wakeUrl, String outputParentPath, String tempParentPath, int index)
{
String apiUrl = "https://api.wakelet.com/collections/";
{
String[] parts = args[0].split("/");
String[] parts = wakeUrl.split("/");
if (parts.length <= 0)
{
System.out.println("wakelet_downloader_1: Parameter argument \"" + args[0] + "\" for the Wakelet wake URL doesn't contain the wake ID part.");
System.out.println("wakelet_downloader_1: Parameter argument \"" + wakeUrl + "\" for the Wakelet wake URL doesn't contain the wake ID part.");
System.exit(1);
}
cardsUrl += parts[parts.length - 1];
cardsUrl += "/cards";
String id = parts[parts.length - 1];
if (this.globalWakeIds.contains(id) == true)
{
System.out.println("wakelet_downloader_1: Skipping wake with ID '" + id + "', already downloaded.");
return 0;
}
this.globalWakeIds.add(id);
System.out.println("wakelet_downloader_1: Downloading wake with ID '" + id + "'.");
apiUrl += parts[parts.length - 1];
apiUrl += "/cards";
}
if (tempParentPath.endsWith("/") != true &&
tempParentPath.endsWith("\\") != true)
{
tempParentPath += File.separator;
}
File tempDirectory = new File(programPath + "temp");
File tempDirectory = null;
if (index >= 0)
{
tempDirectory = new File(tempParentPath + "temp-" + index);
}
else
{
tempDirectory = new File(tempParentPath + "temp");
}
if (tempDirectory.exists() == true)
{
......@@ -131,7 +167,22 @@ public class wakelet_downloader_1
}
}
File outputDirectory = new File(args[1]);
if (outputParentPath.endsWith("/") != true &&
outputParentPath.endsWith("\\") != true)
{
outputParentPath += File.separator;
}
File outputDirectory = null;
if (index >= 0)
{
outputDirectory = new File(outputParentPath + "wake-" + index);
}
else
{
outputDirectory = new File(outputParentPath + "wake");
}
try
{
......@@ -152,11 +203,8 @@ public class wakelet_downloader_1
{
if (outputDirectory.isDirectory() == true)
{
if (outputDirectory.canWrite() != true)
{
System.out.println("wakelet_downloader_1: Output directory \"" + outputDirectory.getAbsolutePath() + "\" isn't writable.");
System.exit(1);
}
System.out.println("wakelet_downloader_1: Output directory \"" + outputDirectory.getAbsolutePath() + "\" does already exist.");
System.exit(1);
}
else
{
......@@ -177,25 +225,26 @@ public class wakelet_downloader_1
}
}
attemptRetrieval(cardsUrl, "cards", programPath, tempDirectory);
attemptRetrieval(apiUrl, "wake", tempDirectory);
File cardsJsonFile = new File(tempDirectory.getAbsolutePath() + File.separator + "resource_cards");
File cardsJsonXmlFile = new File(tempDirectory.getAbsolutePath() + File.separator + "cards.jsonxml");
File wakeJsonFile = new File(tempDirectory.getAbsolutePath() + File.separator + "resource_wake");
File wakeJsonXmlFile = new File(tempDirectory.getAbsolutePath() + File.separator + "wake.jsonxml");
jsonToXml(cardsJsonFile, programPath, tempDirectory, cardsJsonXmlFile);
jsonToXml(wakeJsonFile, tempDirectory, wakeJsonXmlFile);
File cardsJsonXmlPreparationStylesheet = new File(programPath + "jsonxml_preparation_stylesheet_cards.xsl");
File cardsXmlFile = new File(outputDirectory.getAbsolutePath() + File.separator + "cards.xml");
File wakeJsonXmlPreparationStylesheet = new File(this.programPath + "jsonxml_preparation_stylesheet_wake.xsl");
File wakeXmlFile = new File(outputDirectory.getAbsolutePath() + File.separator + "wake.xml");
xmlTransform(cardsJsonXmlFile, programPath, tempDirectory, cardsJsonXmlPreparationStylesheet, cardsXmlFile);
xmlTransform(wakeJsonXmlFile, tempDirectory, wakeJsonXmlPreparationStylesheet, wakeXmlFile);
List<CardSource> cardSources = new ArrayList<CardSource>();
int subCard = 1;
List<String> subWakeUrls = new ArrayList<String>();
int cardCount = 1;
try
{
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
InputStream in = new FileInputStream(cardsXmlFile);
InputStream in = new FileInputStream(wakeXmlFile);
XMLEventReader eventReader = inputFactory.createXMLEventReader(in);
while (eventReader.hasNext() == true)
......@@ -206,245 +255,160 @@ public class wakelet_downloader_1
{
String tagName = event.asStartElement().getName().getLocalPart();
if (tagName.equals("wake") == true)
if (tagName.equals("description") == true)
{
Attribute attributeUrl = event.asStartElement().getAttributeByName(new QName("url"));
File cardDestinationFile = new File(outputDirectory.getAbsolutePath() + File.separator + "card-" + cardCount + ".xhtml");
if (attributeUrl == null)
if (cardDestinationFile.exists() == true)
{
continue;
}
if (cardDestinationFile.isFile() == true)
{
boolean deleteSuccessful = false;
String cardUrl = attributeUrl.getValue();
try
{
deleteSuccessful = cardDestinationFile.delete();
}
catch (SecurityException ex)
{
{
String[] parts = cardUrl.split("/");
}
/**
* @todo If character isn't found, array contains the original cardUrl string as the first and only element,
* used below in the cardUrl.split("\\?")[0] shortcut.
*/
if (parts.length <= 0)
if (deleteSuccessful != true)
{
if (cardDestinationFile.canWrite() != true)
{
System.out.println("wakelet_downloader_1: Can't overwrite output file \"" + cardDestinationFile.getAbsolutePath() + "\".");
System.exit(1);
}
}
}
else
{
System.out.println("wakelet_downloader_1: Card URL \"" + cardUrl + "\" doesn't contain the wake ID part.");
System.out.println("wakelet_downloader_1: Output path \"" + cardDestinationFile.getAbsolutePath() + "\" isn't a file.");
System.exit(1);
}
cardUrl = parts[parts.length - 1];
cardUrl = cardUrl.split("\\?")[0];
cardUrl = "https://api.wakelet.com/collections/" + cardUrl + "/cards";
}
String subCardName = "card-" + subCard;
try
{
BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(cardDestinationFile),
"UTF-8"));
writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
writer.write("<!DOCTYPE html\n");
writer.write(" PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n");
writer.write(" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n");
writer.write("<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"de\" lang=\"de\">\n");
writer.write(" <head>\n");
writer.write(" <meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\"/>\n");
writer.write(" <!-- This file was created by wakelet_downloader_1, which is free software licensed under the GNU Affero General Public License 3 or any later version (see https://gitlab.com/publishing-systems/clients/ and http://www.publishing-systems.org). -->\n");
writer.write(" <title>Wakelet Card</title>\n");
writer.write(" </head>\n");
writer.write(" <body>\n");
writer.write(" <div>\n");
while (eventReader.hasNext() == true)
{
event = eventReader.nextEvent();
if (event.isCharacters() == true)
{
// This de-escapes the XML special characters from the
// XHTML payload as obtained from JSON.
writer.write(event.asCharacters().getData());
}
else if (event.isEndElement() == true)
{
tagName = event.asEndElement().getName().getLocalPart();
attemptRetrieval(cardUrl, subCardName, programPath, tempDirectory);
if (tagName.equals("description") == true)
{
break;
}
}
}
File cardJsonFile = new File(tempDirectory.getAbsolutePath() + File.separator + "resource_" + subCardName);
File cardJsonXmlFile = new File(tempDirectory.getAbsolutePath() + File.separator + subCardName + ".jsonxml");
writer.write(" </div>\n");
writer.write(" </body>\n");
writer.write("</html>\n");
jsonToXml(cardJsonFile, programPath, tempDirectory, cardJsonXmlFile);
writer.flush();
writer.close();
}
catch (FileNotFoundException ex)
{
System.out.println("wakelet_downloader_1: An error occurred while writing output file \"" + cardDestinationFile.getAbsolutePath() + "\".");
System.exit(1);
}
catch (UnsupportedEncodingException ex)
{
System.out.println("wakelet_downloader_1: An error occurred while writing output file \"" + cardDestinationFile.getAbsolutePath() + "\".");
System.exit(1);
}
catch (IOException ex)
{
System.out.println("wakelet_downloader_1: An error occurred while writing output file \"" + cardDestinationFile.getAbsolutePath() + "\".");
System.exit(1);
}
File cardXmlFile = new File(outputDirectory.getAbsolutePath() + File.separator + subCardName + ".xml");
++cardCount;
}
else if (tagName.equals("wake") == true)
{
Attribute attributeUrl = event.asStartElement().getAttributeByName(new QName("url"));
xmlTransform(cardJsonXmlFile, programPath, tempDirectory, cardsJsonXmlPreparationStylesheet, cardXmlFile);
if (attributeUrl == null)
{
continue;
}
cardSources.add(new CardSource(subCard, cardXmlFile));
String subWakeUrl = attributeUrl.getValue().split("\\?")[0];
++subCard;
if (subWakeUrls.contains(subWakeUrl) != true)
{
subWakeUrls.add(subWakeUrl);
}
}
}
}
}
catch (XMLStreamException ex)
{
System.out.println("wakelet_downloader_1: An error occurred while reading \"" + cardsXmlFile.getAbsolutePath() + "\".");
System.out.println("wakelet_downloader_1: An error occurred while reading \"" + wakeXmlFile.getAbsolutePath() + "\".");
System.exit(1);
}
catch (SecurityException ex)
{
System.out.println("wakelet_downloader_1: An error occurred while reading \"" + cardsXmlFile.getAbsolutePath() + "\".");
System.out.println("wakelet_downloader_1: An error occurred while reading \"" + wakeXmlFile.getAbsolutePath() + "\".");
System.exit(1);
}
catch (IOException ex)
{
System.out.println("wakelet_downloader_1: An error occurred while reading \"" + cardsXmlFile.getAbsolutePath() + "\".");
System.out.println("wakelet_downloader_1: An error occurred while reading \"" + wakeXmlFile.getAbsolutePath() + "\".");
System.exit(1);
}
for (CardSource cardSource : cardSources)
int subWakeCount = 1;
for (String subWakeUrl : subWakeUrls)
{
File cardDestionationDirectory = new File(outputDirectory.getAbsolutePath() + File.separator + "card-" + cardSource.GetId());
int result = attemptDownload(subWakeUrl, outputDirectory.getAbsolutePath(), tempDirectory.getAbsolutePath(), subWakeCount);
if (cardDestionationDirectory.exists() == true)
{
if (cardDestionationDirectory.isDirectory() == true)
{
if (cardDestionationDirectory.canWrite() != true)
{
System.out.println("wakelet_downloader_1: Output directory \"" + cardDestionationDirectory.getAbsolutePath() + "\" isn't writable.");
System.exit(1);
}
}
else
{
System.out.println("wakelet_downloader_1: Output path \"" + cardDestionationDirectory.getAbsolutePath() + "\" isn't a directory.");
System.exit(1);
}
}
else
if (result != 0)
{
try
{
cardDestionationDirectory.mkdirs();
}
catch (SecurityException ex)
{
System.out.println("wakelet_downloader_1: Can't create output directory \"" + cardDestionationDirectory.getAbsolutePath() + "\".");
System.exit(1);
}
return result;
}
int noteCount = 1;
try
{
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
InputStream in = new FileInputStream(cardSource.GetCardFile());
XMLEventReader eventReader = inputFactory.createXMLEventReader(in);
while (eventReader.hasNext() == true)
{
XMLEvent event = eventReader.nextEvent();
if (event.isStartElement() == true)
{
String tagName = event.asStartElement().getName().getLocalPart();
if (tagName.equals("description") == true)
{
File cardDestionationFile = new File(cardDestionationDirectory.getAbsolutePath() + File.separator + "note-" + noteCount + ".xhtml");
if (cardDestionationFile.exists() == true)
{
if (cardDestionationFile.isFile() == true)
{
boolean deleteSuccessful = false;
try
{
deleteSuccessful = cardDestionationFile.delete();
}
catch (SecurityException ex)
{
}
if (deleteSuccessful != true)
{
if (cardDestionationFile.canWrite() != true)
{
System.out.println("wakelet_downloader_1: Can't overwrite output file \"" + cardDestionationFile.getAbsolutePath() + "\".");
System.exit(1);
}
}
}
else
{
System.out.println("wakelet_downloader_1: Output path \"" + cardDestionationFile.getAbsolutePath() + "\" isn't a file.");
System.exit(1);
}
}
try
{
BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(cardDestionationFile),
"UTF-8"));
writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
writer.write("<!DOCTYPE html\n");
writer.write(" PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n");
writer.write(" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n");
writer.write("<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"de\" lang=\"de\">\n");
writer.write(" <head>\n");
writer.write(" <meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\"/>\n");
writer.write(" <!-- This file was created by wakelet_downloader_1, which is free software licensed under the GNU Affero General Public License 3 or any later version (see https://gitlab.com/publishing-systems/clients/ and http://www.publishing-systems.org). -->\n");
writer.write(" <title>Wakelet Note</title>\n");
writer.write(" </head>\n");
writer.write(" <body>\n");
writer.write(" <div>\n");
while (eventReader.hasNext() == true)
{
event = eventReader.nextEvent();
if (event.isCharacters() == true)
{
// This de-escapes the XML special characters from the
// XHTML payload as obtained from JSON.
writer.write(event.asCharacters().getData());
}
else if (event.isEndElement() == true)
{
tagName = event.asEndElement().getName().getLocalPart();
if (tagName.equals("description") == true)
{
break;
}
}
}
writer.write(" </div>\n");
writer.write(" </body>\n");
writer.write("</html>\n");
writer.flush();
writer.close();
}
catch (FileNotFoundException ex)
{
System.out.println("wakelet_downloader_1: An error occurred while writing output file \"" + cardDestionationFile.getAbsolutePath() + "\".");
System.exit(1);
}
catch (UnsupportedEncodingException ex)
{
System.out.println("wakelet_downloader_1: An error occurred while writing output file \"" + cardDestionationFile.getAbsolutePath() + "\".");
System.exit(1);