Commit 91cd6cea authored by Matthew Brunelle's avatar Matthew Brunelle

initial backup

parents
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Serverless directories
.serverless
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
from label_image import load_graph, read_tensor_from_image_file, load_labels
import numpy as np
import tensorflow as tf
import requests
import os
class Classifier:
"""
Wraps the Tensorflow label_image script:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/label_image.py
Loads the Mobile Soup classifier
"""
input_name = "import/" + "Placeholder"
output_name = "import/" + "final_result"
# size = 299 # size for ImageNet classifier
size = 224 # size for Mobile Soup
def __init__(self, graph_name="resources/output_graph_mobile.pb"):
"""
Initializes the tensorflow session and loads the classifier graph.
:param graph_name: the graph name can be overridden to use a different model
"""
self.graph = load_graph(graph_name)
self.input_operation = self.graph.get_operation_by_name(self.input_name)
self.output_operation = self.graph.get_operation_by_name(self.output_name)
self.session = tf.Session(graph=self.graph)
self.labels = load_labels("resources/output_labels.txt")
if not os.path.exists("/tmp/photos"):
os.makedirs("/tmp/photos")
def __enter__(self):
"""
Provided to allow using the classifier in a with statement.
"""
return self
def __exit__(self, exc_type, exc_value, traceback):
"""
Provided to allow using the classifier in a with statement. Insures session is closed.
"""
self.session.close()
def classify(self, image):
"""
Downloads the image from the scraped url if it has not been cached yet (eg lambda functions being closed).
Reads the image file and runs the classifier.
:param image: The image to classify
:return: the images classification
"""
print(f"\tclassifying image {image.file_name()}")
if not os.path.exists(image.file_name()):
print(f"\tImage not saved, downloading. {image.file_name()}")
r = requests.get(image.url, timeout=2.0)
if r.status_code == 200:
with open(image.file_name(), 'wb') as f:
f.write(r.content)
t = read_tensor_from_image_file(
image.file_name(),
input_height=self.size,
input_width=self.size,
input_mean=0,
input_std=255)
results = self.session.run(self.output_operation.outputs[0], {
self.input_operation.outputs[0]: t
})
results = np.squeeze(results)
soup_confidence = results[self.labels.index("soup")].astype(float)
print(f"\tfinished classifying image with soup confidence {soup_confidence}: {image}")
image.soup_confidence = soup_confidence
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
"""
Models used for storing the image classifications.
"""
Base = declarative_base()
class Image(Base):
"""
Holds the image data from a scraped post and the classification from tensorflow
"""
__tablename__ = 'images'
url = Column(String, primary_key=True)
post_date = Column(Integer)
soup_confidence = Column(Float)
posted = Column(Integer, default=0)
def file_name(self):
return "/tmp/" + self.url.split('/')[-1]
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import numpy as np
import tensorflow as tf
def load_graph(model_file):
graph = tf.Graph()
graph_def = tf.GraphDef()
with open(model_file, "rb") as f:
graph_def.ParseFromString(f.read())
with graph.as_default():
tf.import_graph_def(graph_def)
return graph
def read_tensor_from_image_file(file_name,
input_height=299,
input_width=299,
input_mean=0,
input_std=255):
input_name = "file_reader"
output_name = "normalized"
file_reader = tf.read_file(file_name, input_name)
if file_name.endswith(".png"):
image_reader = tf.image.decode_png(
file_reader, channels=3, name="png_reader")
elif file_name.endswith(".gif"):
image_reader = tf.squeeze(
tf.image.decode_gif(file_reader, name="gif_reader"))
elif file_name.endswith(".bmp"):
image_reader = tf.image.decode_bmp(file_reader, name="bmp_reader")
else:
image_reader = tf.image.decode_jpeg(
file_reader, channels=3, name="jpeg_reader")
float_caster = tf.cast(image_reader, tf.float32)
dims_expander = tf.expand_dims(float_caster, 0)
resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
sess = tf.Session()
result = sess.run(normalized)
return result
def load_labels(label_file):
label = []
proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
for l in proto_as_ascii_lines:
label.append(l.rstrip())
return label
if __name__ == "__main__":
file_name = "tensorflow/examples/label_image/data/grace_hopper.jpg"
model_file = \
"tensorflow/examples/label_image/data/inception_v3_2016_08_28_frozen.pb"
label_file = "tensorflow/examples/label_image/data/imagenet_slim_labels.txt"
input_height = 299
input_width = 299
input_mean = 0
input_std = 255
input_layer = "input"
output_layer = "InceptionV3/Predictions/Reshape_1"
parser = argparse.ArgumentParser()
parser.add_argument("--image", help="image to be processed")
parser.add_argument("--graph", help="graph/model to be executed")
parser.add_argument("--labels", help="name of file containing labels")
parser.add_argument("--input_height", type=int, help="input height")
parser.add_argument("--input_width", type=int, help="input width")
parser.add_argument("--input_mean", type=int, help="input mean")
parser.add_argument("--input_std", type=int, help="input std")
parser.add_argument("--input_layer", help="name of input layer")
parser.add_argument("--output_layer", help="name of output layer")
args = parser.parse_args()
if args.graph:
model_file = args.graph
if args.image:
file_name = args.image
if args.labels:
label_file = args.labels
if args.input_height:
input_height = args.input_height
if args.input_width:
input_width = args.input_width
if args.input_mean:
input_mean = args.input_mean
if args.input_std:
input_std = args.input_std
if args.input_layer:
input_layer = args.input_layer
if args.output_layer:
output_layer = args.output_layer
graph = load_graph(model_file)
t = read_tensor_from_image_file(
file_name,
input_height=input_height,
input_width=input_width,
input_mean=input_mean,
input_std=input_std)
input_name = "import/" + input_layer
output_name = "import/" + output_layer
input_operation = graph.get_operation_by_name(input_name)
output_operation = graph.get_operation_by_name(output_name)
with tf.Session(graph=graph) as sess:
results = sess.run(output_operation.outputs[0], {
input_operation.outputs[0]: t
})
results = np.squeeze(results)
top_k = results.argsort()[-5:][::-1]
labels = load_labels(label_file)
for i in top_k:
print(labels[i], results[i])
from slacker import Slacker
import os
class Messenger:
def __init__(self):
# TODO: handle token better
print("configuring messenger")
self.SLACK_BOT_TOKEN = os.environ["SLACK_BOT_TOKEN"]
self.slack = Slacker(self.SLACK_BOT_TOKEN)
print("configured messenger")
def post_message_to_channel(self, image):
message = "I am {:.2%} sure this is the newest soup board: {}".format(image.soup_confidence, image.url)
print(f"posting message to channel:\n\t {message}")
self.slack.chat.post_message('#soup-bot', message, username="soup-bot")
image.posted = 1
import os
from instalooter.looters import ProfileLooter
import classifier
from entities import *
from datetime import datetime
from queries import Queries
photo_path = "C://Users//mbrunelle//soup//photos"
os.makedirs(photo_path, exist_ok=True)
os.makedirs(photo_path + "/probably_not_soup", exist_ok=True)
os.makedirs(photo_path + "/probably_soup", exist_ok=True)
queries = Queries()
print("configuring looter")
# https://github.com/althonos/InstaLooter/issues/173
looter = ProfileLooter("davesfreshpasta")
print("configured looter")
start_time = datetime.now()
last_date = queries.oldest_image_date()
timeframe = (start_time.date(), last_date)
posts = looter.medias(timeframe)
# https://github.com/althonos/InstaLooter/issues/171
try:
with classifier.Classifier() as c:
for post in posts:
url = post['display_url']
if queries.image_exists(url):
print(f"\talready classified, skipping {url}")
continue
timestamp = post['taken_at_timestamp']
print(url)
image = Image(url=url, post_date=timestamp)
c.classify(image)
queries.add(image)
destination = photo_path + "/probably_not_soup/"
if image.soup_confidence >= 0.7:
# Get file name without path stripped
destination = photo_path + "/probably_soup/"
file_name = image.file_name().split("/")[-1]
print("\t" + destination + file_name)
try:
os.rename(image.file_name(), destination + file_name)
except FileExistsError:
print(f"\talready exists, skipping")
queries.commit()
except RuntimeError as err:
print(err)
\ No newline at end of file
import os
from sqlalchemy.orm import *
from entities import *
from datetime import datetime, timedelta
class Queries:
"""
Contains the database queries
"""
def __init__(self):
"""
Based on environment variables will load either PostgreSQL on RDS or a local SQLite instance.
Creates all schema if it has not been initialized yet.
"""
print("Initializing query engine.")
if os.environ.get("ENV", None) == "AWS":
print("Connecting to postgres database on AWS")
rds_user = os.environ.get("RDS_USER")
rds_password = os.environ.get("RDS_PASSWORD")
rds_port = os.environ.get("RDS_PORT", 5432)
rds_host = os.environ.get("RDS_HOST")
url = f'postgresql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/soup'
self.engine = create_engine(url)
print("Connected to postgres database on AWS")
# Do I actually... miss DI?
else:
print("Connecting to local sqlite database")
self.engine = create_engine('sqlite:///resources/soup.db')
print("Connecting to local sqlite database")
print("Creating Schema if necessary.")
Base.metadata.create_all(self.engine)
print("Creating session")
Session = sessionmaker(bind=self.engine)
self.session = Session()
print("Finished initializing query engine")
def image_exists(self, url):
"""
Indicates whether an image exists alreaday for the given url
:param the image to search
:return:
"""
return self.session.query(exists().where(Image.url == url)).scalar()
def top_soup(self, confidence=0.8):
"""
Finds the most recent classification that is likely soup.
:return:
"""
return self.session.query(Image)\
.filter(Image.soup_confidence >= confidence)\
.order_by(desc(Image.post_date))\
.first()
def most_recent_image_date(self):
"""
Finds the date of the most recent image.
:return:
"""
image = self.session.query(Image)\
.order_by(desc(Image.post_date))\
.first()
if not image:
return datetime.now().date() - timedelta(days=5)
return datetime.fromtimestamp(image.post_date).date()
def oldest_image_date(self):
"""
Finds the date of the oldest image.
:return:
"""
image = self.session.query(Image)\
.order_by(asc(Image.post_date))\
.first()
if not image:
return datetime.now().date() - timedelta(days=5)
return datetime.fromtimestamp(image.post_date).date()
def add(self, entry):
"""
I should likely refactor this.
:param entry: the entry to add
:return:
"""
self.session.add(entry)
# TODO consider making queries managed with a with statement and auto commit
def commit(self):
"""
I should likely refactor this.
:return:
"""
self.session.commit()
# Welcome to Serverless!
#
# This file is the main config file for your service.
# It's very minimal at this point and uses default values.
# You can always add more config options for more control.
# We've included some commented out config examples here.
# Just uncomment any of them to get that config option.
#
# For full config options, check the docs:
# docs.serverless.com
#
# Happy Coding!
service: super-soup
# You can pin your service to only deploy with a specific Serverless version
# Check out our docs for more details
# frameworkVersion: "=X.X.X"
provider:
name: aws
runtime: python3.6
region: us-east-1
iamRoleStatements:
- Effect: "Allow"
Action:
- rds:*
Resource: "arn:aws:rds:us-east-1:698392743170:db:super-soup"
# you can overwrite defaults here
# stage: dev
# region: us-east-1
# you can add statements to the Lambda function's IAM Role here
# iamRoleStatements:
# - Effect: "Allow"
# Action:
# - "s3:ListBucket"
# Resource: { "Fn::Join" : ["", ["arn:aws:s3:::", { "Ref" : "ServerlessDeploymentBucket" } ] ] }
# - Effect: "Allow"
# Action:
# - "s3:PutObject"
# Resource:
# Fn::Join:
# - ""
# - - "arn:aws:s3:::"
# - "Ref" : "ServerlessDeploymentBucket"
# - "/*"
# you can define service wide environment variables here
# environment:
# variable1: value1
# you can add packaging information here
#package:
# include:
# - include-me.py
# - include-me-dir/**
# exclude:
# - exclude-me.py
# - exclude-me-dir/**
functions:
hello:
handler: tracker.update
vpc:
securityGroupIds:
- sg-22c4d06b
subnetIds:
- subnet-888e31ff
- subnet-5bb94270
- subnet-92409ecb
- subnet-627ae058
events:
- schedule:
rate: cron(0/15 15 ? * MON-FRI *)
# The following are a few example events you can configure
# NOTE: Please make sure to change your handler code to work with those events
# Check the event documentation for details
# events:
# - http:
# path: users/create
# method: get
# - s3: ${env:BUCKET}
# - schedule: rate(10 minutes)
# - sns: greeter-topic
# - stream: arn:aws:dynamodb:region:XXXXXX:table/foo/stream/1970-01-01T00:00:00.000
# - alexaSkill
# - alexaSmartHome: amzn1.ask.skill.xx-xx-xx-xx
# - iot:
# sql: "SELECT * FROM 'some_topic'"
# - cloudwatchEvent:
# event:
# source:
# - "aws.ec2"
# detail-type:
# - "EC2 Instance State-change Notification"
# detail:
# state:
# - pending
# - cloudwatchLog: '/aws/lambda/hello'
# - cognitoUserPool:
# pool: MyUserPool
# trigger: PreSignUp
# Define function environment variables here
# environment:
# variable2: value2
# you can add CloudFormation resource templates here
#resources:
# Resources:
# NewResource:
# Type: AWS::S3::Bucket
# Properties:
# BucketName: my-new-bucket
# Outputs:
# NewOutput:
# Description: "Description for the output"
# Value: "Some output value"
plugins:
- serverless-python-requirements
custom:
pythonRequirements:
dockerizePip: true
zip: true
package:
exclude:
- 'resources/output_graph.pb'
- 'resources/soup.db'
\ No newline at end of file
try:
import unzip_requirements
except ImportError:
pass
import os
import classifier
from datetime import datetime
from instalooter.looters import ProfileLooter
from queries import Queries
from messenger import Messenger
from entities import *
import fs
queries = Queries()
def classify_posts(posts):
with classifier.Classifier() as c:
for post in posts:
timestamp = post['taken_at_timestamp']
picture_url = post['display_url']
print(f"checking url {picture_url}")
if queries.image_exists(picture_url):
print(f"\talready processed. skipping {picture_url}")
continue
print(f"\timage not loaded, adding to db {picture_url}")
image = Image(url=picture_url, post_date=timestamp)
print(f"\tclassifying image {picture_url}")
c.classify(image)
queries.add(image)
queries.commit()
def update(event, context):
start_time = datetime.now()
print("Starting soup")
queries = Queries()
print("configuring looter")
# https://github.com/althonos/InstaLooter/issues/173
ProfileLooter._cachefs = fs.open_fs("osfs:///tmp/")
looter = ProfileLooter("davesfreshpasta")
print("configured looter")
print("finished setup")
last_date = queries.most_recent_image_date()
timeframe = (start_time.date(), last_date)
print(f"last post date: {last_date}. Timeframe is {timeframe}")
posts = looter.medias(timeframe=timeframe)
if posts:
classify_posts(posts)
repost_soup = os.environ.get("REPOST_SOUP", default=False)
confidence = os.environ.get("CONFIDENCE", default=0.8)
# TODO: move posting into post iteration loop
top_image = queries.top_soup(confidence)
if (not top_image.posted) or repost_soup:
messenger = Messenger()
messenger.post_message_to_channel(top_image)
queries.commit() # messenger will mark image as posted
elapsed_time = datetime.now() - start_time
print(f"Finished soup. Elapsed time {elapsed_time}")
if __name__ == '__main__':
update(None, None)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment