Commit 3a20a281 authored by Dave Morriss's avatar Dave Morriss

Added 'collect_apod'

Tidied up 'collect_apod_simple'
parent 598216cb
#!/usr/bin/env perl
#===============================================================================
#
# FILE: collect_apod
#
# USAGE: ./collect_apod [YYMMDD]
#
# DESCRIPTION: Downloads the current Astronomy Picture of the Day or that
# relating to the formatted date provided as an argument. In
# this context "current" can mean two URLs: .../astropix.html or
# .../apYYMMDD.html. We now *do not* download the
# .../astropix.html version since it has a different HTML
# layout.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: This version has been rewritten using HTML::TreeBuilder to
# parse the HTML, and the logic has been improved.
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.1.2
# CREATED: 2012-05-06 18:31:13
# REVISION: 2015-01-04 10:32:44
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use Carp;
use LWP::UserAgent;
use DateTime;
use HTML::TreeBuilder 5 -weak;
use Image::Magick;
use File::Temp;
#
# Version number (manually incremented)
#
our $VERSION = '0.1.2';
#
# Set to 0 to be more silent
#
my $DEBUG = 1;
#
# Script name
#
( my $PROG = $0 ) =~ s|.*/||mx;
#-------------------------------------------------------------------------------
# Edit this to your needs
#-------------------------------------------------------------------------------
#
# Where the script will download the picture. Edit this to where you want
#
my $image_base = "$ENV{HOME}/Backgrounds/apod";
#-------------------------------------------------------------------------------
# Nothing needs editing below here
#-------------------------------------------------------------------------------
#
# Get the argument or default it
#
my $arg = shift;
unless ( defined($arg) ) {
#
# APOD wants a date in YYMMDD format
#
my $dt = DateTime->now;
$arg = sprintf( "%02i%02i%02i",
substr( $dt->year, -2 ),
$dt->month, $dt->day );
}
#
# Make an URL depending on the argument
#
my $apod_base = "http://apod.nasa.gov/apod";
my $apod_URL = "$apod_base/ap$arg.html";
#
# General declarations
#
my ( $image_URL, $image_file, $newfile );
my ( $tree, $title, $add_title, $fh, $fname );
my ( $url, $element, $attr, $tag );
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
if ($DEBUG) {
print "Base URL: $apod_base\n";
print "APOD URL: $apod_URL\n";
print "Image base: $image_base\n";
print "\n";
}
#
# Get the page, pretending to be some unknown User Agent
#
my $ua = LWP::UserAgent->new;
$ua->agent("MyApp/0.1");
my $req = HTTP::Request->new( GET => $apod_URL );
my $res = $ua->request($req);
if ( $res->is_success ) {
print "GET request successful\n" if $DEBUG;
#
# Parse the HTML we got back
#
$tree = HTML::TreeBuilder->new;
$tree->parse_content( $res->content_ref );
#
# Get the title
#
if ( $title = $tree->look_down( _tag => 'title' ) ) {
$title = $title->as_trimmed_text();
if ($DEBUG) {
print "Found title: $title\n" if $title;
}
$add_title = defined($title) && length($title) > 0;
}
else {
$title = "No title";
$add_title = 0;
}
#
# Look for the image. This is expected to be the href attribute of an <a>
# tag. The image we see on the page is merely a link to this (usually)
# larger image.
#
for ( @{ $tree->extract_links('a') } ) {
( $url, $element, $attr, $tag ) = @$_;
if ($DEBUG) {
print "Found: $url\n" if $url;
}
last unless defined($url);
last if ( $url =~ /\.(jpg|png)$/i );
}
#
# Abort if no image (it might be a video or a GIF)
#
die "Image URL not found\n"
unless defined($url)
&& $url =~ /\.(jpg|png)$/i;
$image_URL = "$apod_base/$url";
#
# Extract the final part for the file name. We usually get a JPEG,
# sometimes with a shouty extension, which we change. The image we'll make
# with the title in it will be a PNG.
#
( $image_file = $image_URL ) =~ s|.*/||mx;
( $image_file = "$image_base/$image_file" ) =~ s/JPG$/jpg/mx;
( $newfile = $image_file ) =~ s/\.[^.]*$/.png/;
if ($DEBUG) {
print "Image URL: $image_URL\n";
print "Image file: $image_file\n";
print "Annotated file: $newfile\n";
}
#
# Abort if the file already exists (the script already ran?)
#
die "File $image_file already exists\n" if ( -f $image_file );
die "File $newfile already exists\n" if ( -f $newfile );
#
# Set up the temporary file (assuming there's a /tmp directory - this is
# UNIX isn't it?)
#
$fh = File::Temp->new( TEMPLATE => 'apod_image_XXXXXX', DIR => '/tmp' );
$fname = $fh->filename;
#
# Set up the GET request for the image
#
$req = HTTP::Request->new( GET => $image_URL );
#
# Download the image to the temporary file
#
$res = $ua->request( $req, $fname );
if ( $res->is_success ) {
print "Downloaded to $fname\n" if $DEBUG;
#
# Prepare for the annotation of the file
#
my $image = Image::Magick->new;
#
# Read the temporary file
#
my $imres = $image->Read($fname);
carp "$imres" if $imres;
#
# Apply the title if there is one
#
if ($add_title) {
$imres = $image->Annotate(
font => 'kai.ttf',
pointsize => 20,
fill => 'white',
undercolor => 'black',
text => $title,
gravity => 'North'
);
carp "$imres" if $imres;
}
#
# Write the result to the transformed image file name
#
$imres = $image->Write($newfile);
carp "$imres" if $imres;
#
# Report the output file - if it exists
#
if ( -e $newfile ) {
#
# The null string here is for the Perl compiler which can't seem to
# parse this without it
#
print "", ( $add_title ? "Annotated" : "Un-annotated" ),
" image in $newfile\n";
}
else {
print "Unable to write $newfile\n";
}
}
else {
#
# The image download failed
#
die $res->status_line, " ($image_URL)\n";
}
}
else {
#
# We failed to get the web page
#
die $res->status_line, " ($apod_URL)\n";
}
exit;
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
......@@ -20,7 +20,7 @@
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.1
# CREATED: 2015-01-02 19:58:01
# REVISION: 2015-01-02 21:09:06
# REVISION: 2015-01-03 23:00:27
#
#===============================================================================
......@@ -29,7 +29,6 @@ use strict;
use warnings;
use utf8;
use LWP;
use LWP::UserAgent;
use DateTime;
use HTML::TreeBuilder 5 -weak;
......@@ -45,11 +44,9 @@ our $VERSION = '0.0.1';
my $DEBUG = 1;
#
# Script and directory names
# Script name
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Edit this to your needs
......@@ -80,7 +77,7 @@ unless ( defined($arg) ) {
#
# Check the argument is a valid date in YYMMDD format
#
die "Usage: $PROG [YYMMDD]\n" unless ($arg =~ /^\d{6}$/);
die "Usage: $PROG [YYMMDD]\n" unless ( $arg =~ /^\d{6}$/ );
#
# Make an URL depending on the argument
......@@ -98,8 +95,8 @@ my ( $url, $element, $attr, $tag );
#
# Enable Unicode mode
#
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
if ($DEBUG) {
print "Base URL: $apod_base\n";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment