Commit 598216cb authored by Dave Morriss's avatar Dave Morriss

Added collect_apod_simple

parent d8cce2cd
# Ignore vim backup and swap files
*~
*.swp
# LibreOffice lock files
.~lock*
# Ignore the Wiki stuff, it's independant
hprmisc-gitorious-wiki/
#!/usr/bin/env perl
#===============================================================================
#
# FILE: collect_apod_simple
#
# USAGE: ./collect_apod_simple [YYMMDD]
#
# DESCRIPTION: Downloads the current Astronomy Picture of the Day or that
# relating to the formatted date provided as an argument. In
# this context "current" can mean two URLs: .../astropix.html or
# .../apYYMMDD.html. We now *do not* download the
# .../astropix.html version since it has a different HTML
# layout.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: Based on 'collect_apod' but without the Image::Magick stuff,
# for simplicity and for release to the HPR community
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.1
# CREATED: 2015-01-02 19:58:01
# REVISION: 2015-01-02 21:09:06
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use LWP;
use LWP::UserAgent;
use DateTime;
use HTML::TreeBuilder 5 -weak;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.1';
#
# Set to 0 to be more silent
#
my $DEBUG = 1;
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Edit this to your needs
#-------------------------------------------------------------------------------
#
# Where the script will download the picture. Edit this to where you want
#
my $image_base = "$ENV{HOME}/Backgrounds/apod";
#-------------------------------------------------------------------------------
# Nothing needs editing below here
#-------------------------------------------------------------------------------
#
# Get the argument or default it
#
my $arg = shift;
unless ( defined($arg) ) {
#
# APOD wants a date in YYMMDD format
#
my $dt = DateTime->now;
$arg = sprintf( "%02i%02i%02i",
substr( $dt->year, -2 ),
$dt->month, $dt->day );
}
#
# Check the argument is a valid date in YYMMDD format
#
die "Usage: $PROG [YYMMDD]\n" unless ($arg =~ /^\d{6}$/);
#
# Make an URL depending on the argument
#
my $apod_base = "http://apod.nasa.gov/apod";
my $apod_URL = "$apod_base/ap$arg.html";
#
# General declarations
#
my ( $image_URL, $image_file );
my ( $tree, $title );
my ( $url, $element, $attr, $tag );
#
# Enable Unicode mode
#
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";
if ($DEBUG) {
print "Base URL: $apod_base\n";
print "APOD URL: $apod_URL\n";
print "Image base: $image_base\n";
print "\n";
}
#
# Get the HTML page, pretending to be some unknown User Agent
#
my $ua = LWP::UserAgent->new;
$ua->agent("MyApp/0.1");
my $req = HTTP::Request->new( GET => $apod_URL );
my $res = $ua->request($req);
if ( $res->is_success ) {
print "GET request successful\n" if $DEBUG;
#
# Parse the HTML we got back
#
$tree = HTML::TreeBuilder->new;
$tree->parse_content( $res->content_ref );
#
# Get and display the title in debug mode
#
if ($DEBUG) {
if ( $title = $tree->look_down( _tag => 'title' ) ) {
$title = $title->as_trimmed_text();
print "Found title: $title\n" if $title;
}
}
#
# Look for the image. This is expected to be the href attribute of an <a>
# tag. The image we see on the page is merely a link to this (usually)
# larger image.
#
for ( @{ $tree->extract_links('a') } ) {
( $url, $element, $attr, $tag ) = @$_;
if ($DEBUG) {
print "Found: $url\n" if $url;
}
last unless defined($url);
last if ( $url =~ /\.(jpg|png)$/i );
}
#
# Abort if no image (it might be a video or a GIF)
#
die "Image URL not found\n"
unless defined($url)
&& $url =~ /\.(jpg|png)$/i;
$image_URL = "$apod_base/$url";
#
# Extract the final part of the URL for the file name. We usually get
# a JPEG, sometimes with a shouty extension, which we change.
#
( $image_file = $image_URL ) =~ s|.*/||mx;
( $image_file = "$image_base/$image_file" ) =~ s/JPG$/jpg/mx;
if ($DEBUG) {
print "Image URL: $image_URL\n";
print "Image file: $image_file\n";
}
#
# Abort if the file already exists (the script already ran?)
#
die "File $image_file already exists\n" if ( -f $image_file );
#
# Set up the GET request for the image
#
$req = HTTP::Request->new( GET => $image_URL );
#
# Download the image to the (possibly renamed) image file
#
$res = $ua->request( $req, $image_file );
if ( $res->is_success ) {
print "Downloaded to $image_file\n" if $DEBUG;
}
else {
#
# The image download failed
#
die $res->status_line, " ($image_URL)\n";
}
}
else {
#
# We failed to get the web page
#
die $res->status_line, " ($apod_URL)\n";
}
exit;
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment