Commit 3f7bd4ce authored by Lucas Moura's avatar Lucas Moura

Imported Upstream version 0.6

parent 355dc3f5
......@@ -5,23 +5,26 @@ import datetime
from apprecommender.recommender import Recommender
from apprecommender.user import LocalSystem
from apprecommender.config import Config
class AppRecommender:
def __init__(self):
self.recommender = Recommender()
self.config = Config()
def make_recommendation(self, recommendation_size):
def make_recommendation(self):
begin_time = datetime.datetime.now()
logging.info("Computation started at %s" % begin_time)
# user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
# "desktopapps"))
user = LocalSystem()
recommendation_size = Config().num_recommendations
user_reccomendation = (self.recommender.get_recommendation(
user, recommendation_size))
logging.info("Recommending applications for user %s" % user.user_id)
logging.info(user_reccomendation)
print (user_reccomendation)
end_time = datetime.datetime.now()
logging.info("Computation completed at %s" % end_time)
......
......@@ -96,6 +96,8 @@ class Config(Singleton):
self.bm25_nl = 0.5
# user content profile size
self.profile_size = 10
self.num_recommendations = 8
self.because = False
# neighborhood size
self.k_neighbors = 50
# popcon profiling method: full, voted
......
......@@ -15,67 +15,40 @@ class LoadOptions(Singleton):
def load(self):
config = Config()
short_options = "hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo"
long_options = ["help", "debug", "verbose", "output=", "filtersdir=",
"pkgsfilter=", "axi=", "dde=", "popconindex=",
"popcondir=", "indexmode=", "clustersdir=",
"kmedoids=", "maxpopcon=", "weight=", "strategy=",
"profile_size=", "profiling=", "neighbors=", "init",
"train"]
short_options = 'hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo'
long_options = ['help', 'debug', 'verbose', 'kmedoids=', 'maxpopcon=',
'weight=', 'strategy=', 'profile_size=', 'init',
'train', 'because', 'nrecommendation']
try:
opts, args = getopt.getopt(sys.argv[1:], short_options,
long_options)
self.options = opts
except getopt.GetoptError as error:
config.set_logger()
logging.error("Bad syntax: %s" % str(error))
logging.error('Bad syntax: {}'.format(str(error)))
self.usage()
sys.exit()
for o, p in opts:
if o in ("-h", "--help"):
if o in ('-h', '--help'):
self.usage()
sys.exit()
elif o in ("-d", "--debug"):
elif o in ('-d', '--debug'):
config.debug = 1
elif o in ("-v", "--verbose"):
elif o in ('-v', '--verbose'):
config.verbose = 1
elif o in ("-o", "--output"):
config.output = p
elif o in ("-f", "--filtersdir"):
config.filters_dir = p
elif o in ("-b", "--pkgsfilter"):
config.pkgs_filter = p
elif o in ("-a", "--axi"):
config.axi = p
elif o in ("-e", "--dde"):
config.dde_url = p
elif o in ("-p", "--popconindex"):
config.popcon_index = p
elif o in ("-m", "--popcondir"):
config.popcon_dir = p
elif o in ("-u", "--index_mode"):
config.index_mode = p
elif o in ("-l", "--clustersdir"):
config.clusters_dir = p
elif o in ("-c", "--kmedoids"):
config.k_medoids = int(p)
elif o in ("-x", "--max_popcon"):
config.max_popcon = int(p)
elif o in ("-w", "--weight"):
config.weight = p
elif o in ("-s", "--strategy"):
elif o in ('-s', '--strategy'):
config.strategy = p
elif o in ("-z", "--profile_size"):
elif o in ('-z', '--profile_size'):
config.profile_size = int(p)
elif o in ("-z", "--profiling"):
config.profiling = p
elif o in ("-n", "--neighbors"):
config.k_neighbors = int(p)
elif o in ("-i", "--init"):
elif o in ('-i', '--init'):
continue
elif o in ("-t", "--train"):
elif o in ('-t', '--train'):
continue
elif o in ('-b', '--because'):
config.because = True
elif o in ('-n', '--num-recommendations'):
config.num_recommendations = int(p)
else:
assert False, "unhandled option"
......@@ -89,6 +62,10 @@ class LoadOptions(Singleton):
print " -i, --init Initialize AppRecommender data"
print " -t, --train Make training of AppRecommender" \
" machine learning"
print " -n, --num-recommendations Set the number of packages that" \
" will be recommended"
print " -b, --because Display user packages that" \
" generated a given recommendation"
print " -d, --debug Set logging level to debug"
print " -v, --verbose Set logging level to verbose"
print " -o, --output=PATH Path to file to save output"
......
......@@ -3,7 +3,6 @@ from os import makedirs
import apt
import pickle
import re
import xapian
import Stemmer
......@@ -122,8 +121,7 @@ class MachineLearningData():
return self.get_pkg_data(axi, pkg_name, 'XT')
def get_pkg_terms(self, cache, pkg_name):
description = cache[pkg_name].candidate.description.strip()
description = re.sub('[^a-zA-Z]', ' ', description)
description = self.get_pkg_description(cache, pkg_name)
tokens = description.lower().split()
stems = [self.stemmer.stemWord(token) for token in tokens
......@@ -134,6 +132,13 @@ class MachineLearningData():
def get_pkg_section(self, cache, pkg_name):
return cache[pkg_name].section
def get_pkg_description(self, cache, pkg_name):
if pkg_name not in cache:
return []
else:
description = cache[pkg_name].candidate.description
return description.strip()
def get_debtags_name(self, file_path):
with open(file_path, 'r') as text:
debtags_name = [debtag.strip() for debtag in text]
......
......@@ -19,11 +19,17 @@ __license__ = """
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import apt
import heapq
import logging
import os
import xapian
import operator
import os
import strategy
import xapian
from collections import namedtuple
from fuzzywuzzy import fuzz
from operator import attrgetter
from apprecommender.config import Config
......@@ -32,29 +38,60 @@ class RecommendationResult:
"""
Class designed to describe a recommendation result: items and scores.
"""
def __init__(self, item_score, ranking=0, limit=0):
def __init__(self, item_score, ranking=0, limit=0, user_profile=None):
"""
Set initial parameters.
"""
self.item_score = item_score
self.size = len(item_score)
self.limit = limit
self.cache = apt.Cache()
self.pkg_descriptions = {}
if ranking:
self.ranking = ranking
if user_profile:
self.fill_pkg_descriptions(user_profile)
def fill_pkg_descriptions(self, user_profile):
for pkg in user_profile:
description = self.cache[pkg].candidate.description
self.pkg_descriptions[pkg] = description.lower()
def __str__(self):
"""
String representation of the object.
"""
# [FIXME] try alternative way to get pkgs summarys (efficiency)
# cache = apt.Cache()
result = self.get_prediction(self.limit)
str = "\n"
for i in range(len((list(result)))):
# summary = cache[result[i][0]].candidate.summary
# str += "%2d: %s\t\t- %s\n" % (i,result[i][0],summary)
str += "%2d: %s\n" % (i, result[i][0])
return str
rec_str = '\n'
index = 1
for pkg, _ in result:
summary = self.cache[pkg].candidate.summary
description = self.cache[pkg].candidate.description
rec_str += '{}: {} \t {}\n'.format(
index, pkg.ljust(20), summary)
if self.pkg_descriptions:
because_pkgs = self.get_because(description.lower())
rec_str += ' because you installed: \t {}\n\n'.format(
', '.join(because_pkgs))
index += 1
return rec_str
def get_because(self, rec_description):
because = []
PkgRatio = namedtuple('PkgRatio', ['pkg', 'ratio'])
for pkg, description in self.pkg_descriptions.iteritems():
ratio = fuzz.ratio(rec_description, description)
because.append(PkgRatio(pkg, ratio))
pkgs = heapq.nlargest(4, because, key=attrgetter('ratio'))
return [pkg for pkg, _ in pkgs]
def get_prediction(self, limit=0):
"""
......
......@@ -26,6 +26,7 @@ import logging
import operator
import os
import pickle
import re
import recommender
import xapian
......@@ -70,11 +71,12 @@ class ContentBased(RecommendationStrategy):
self.profile_size = profile_size
def get_sugestion_from_profile(self, rec, user, profile,
recommendation_size):
recommendation_size, because=True):
query = xapian.Query(xapian.Query.OP_OR, profile)
enquire = xapian.Enquire(rec.items_repository)
enquire.set_weighting_scheme(rec.weight)
enquire.set_query(query)
user_profile = None
# Retrieve matching packages
try:
mset = enquire.get_mset(0, recommendation_size, None,
......@@ -89,7 +91,11 @@ class ContentBased(RecommendationStrategy):
item_score[m.document.get_data()] = m.weight
ranking.append(m.document.get_data())
result = recommender.RecommendationResult(item_score, ranking)
if because and Config().because:
user_profile = user.pkg_profile
result = recommender.RecommendationResult(
item_score, ranking, user_profile=user_profile)
return result
def run(self, rec, user, rec_size):
......@@ -412,12 +418,17 @@ class MachineLearning(ContentBased):
profile = user.content_profile(rec.items_repository, self.content,
self.suggestion_size, rec.valid_tags)
content_based = self.get_sugestion_from_profile(rec, user,
profile,
self.suggestion_size)
content_based = self.get_sugestion_from_profile(
rec, user, profile, self.suggestion_size, because=False)
pkgs, pkgs_score = [], {}
for pkg_line in str(content_based).splitlines()[1:]:
pkg = pkg_line.split(':')[1][1:]
pkg = re.search(r'\d+:\s([\w-]+)', pkg_line)
if not pkg.groups():
continue
pkg = pkg.groups()[0]
pkg_score = int(pkg_line.split(':')[0].strip())
pkgs.append(pkg)
......@@ -435,7 +446,6 @@ class MachineLearning(ContentBased):
kwargs['ml_strategy'] = ml_strategy
for pkg in pkgs:
if pkg not in self.cache:
continue
......@@ -496,6 +506,8 @@ class MachineLearning(ContentBased):
raise NotImplementedError("Method not implemented.")
def run(self, rec, user, rec_size):
user_profile = None
terms_name, debtags_name = self.load_terms_and_debtags()
pkgs, pkgs_score = self.get_pkgs_and_scores(rec, user)
......@@ -504,9 +516,12 @@ class MachineLearning(ContentBased):
debtags_name)
item_score = self.get_item_score(pkgs_score, pkgs_classifications)
result = recommender.RecommendationResult(item_score, limit=rec_size)
return result
if Config().because:
user_profile = user.pkg_profile
return recommender.RecommendationResult(
item_score, limit=rec_size, user_profile=user_profile)
class MachineLearningBVA(MachineLearning):
......
......@@ -4,6 +4,8 @@ import apt
import unittest
import xapian
from mock import patch
from apprecommender.ml.data import MachineLearningData
......@@ -28,19 +30,12 @@ class PkgClassificationTests(unittest.TestCase):
for debtag in vim_debtags:
self.assertTrue(debtag in vim_debtags_result)
def test_get_pkg_terms(self):
vim_terms = [u'vim', u'compat', u'version', u'unix', u'editor', u'vi',
u'new', u'featur', u'ad', u'multi', u'level', u'undo',
u'syntax', u'highlight', u'command', u'line', u'histori',
u'line', u'help', u'filenam', u'complet', u'block',
u'oper', u'fold', u'unicod', u'support', u'packag',
u'contain', u'version', u'vim', u'compil', u'standard',
u'set', u'featur', u'packag', u'doe', u'provid', u'gui',
u'version', u'vim', u'vim', u'packag', u'need']
@patch('apprecommender.ml.data.MachineLearningData.get_pkg_description')
def test_get_pkg_terms(self, mock_description):
mock_description.return_value = 'Vim is an text editor written in C'
vim_terms = [u'vim', u'text', u'editor']
vim_terms_result = self.ml_data.get_pkg_terms(self.cache, 'vim')
print vim_terms_result
for term in vim_terms:
self.assertTrue(term in vim_terms_result)
......@@ -55,7 +50,9 @@ class PkgClassificationTests(unittest.TestCase):
self.assertEqual(row_list_to_assert, row_list)
def test_get_pkg_classification(self):
@patch('apprecommender.ml.data.MachineLearningData.get_pkg_description')
def test_get_pkg_classification(self, mock_description):
mock_description.return_value = 'vim is an text editor written in c'
axi_path = "/var/lib/apt-xapian-index/index"
axi = xapian.Database(axi_path)
pkgs = {'vim': 'EX'}
......
......@@ -29,14 +29,17 @@ from apprecommender.strategy import (ContentBased, MachineLearningBVA,
class RecommendationResultTests(unittest.TestCase):
@classmethod
def setUpClass(self):
self.result = RecommendationResult({"gimp": 1.5, "inkscape": 3.0,
"eog": 1})
def test_str(self):
string = "\n 0: inkscape\n 1: gimp\n 2: eog\n"
self.assertEqual(self.result.__str__(), string)
rec = '\n1: inkscape \t vector-based drawing program\n'
rec += '2: gimp \t The GNU Image Manipulation Program\n' # noqa
rec += '3: eog \t Eye of GNOME graphics viewer program\n' # noqa
self.assertEqual(self.result.__str__(), rec)
def test_get_prediction(self):
prediction = [("inkscape", 3.0), ("gimp", 1.5), ("eog", 1)]
......@@ -44,6 +47,7 @@ class RecommendationResultTests(unittest.TestCase):
class RecommenderTests(unittest.TestCase):
@classmethod
def setUpClass(self):
cfg = Config()
......@@ -78,8 +82,6 @@ class RecommenderTests(unittest.TestCase):
self.rec.set_strategy("mlbow_eset")
self.assertIsInstance(self.rec.strategy, MachineLearningBOW)
self.assertEqual(self.rec.strategy.content, "mlbow_mix_eset")
# self.rec.set_strategy("knn")
# self.assertIsInstance(self.rec.strategy,Collaborative)
def test_get_recommendation(self):
user = User({"inkscape": 1, "gimp": 1, "eog": 1})
......
......@@ -35,9 +35,9 @@ ERROR_INIT = 1
ERROR_TRAIN = 2
def call_initialize(options):
def check_for_flag(options, short_flag, long_flag):
for option, _ in options:
if option in ("-i", "--init"):
if option in (short_flag, long_flag):
return True
return False
......@@ -45,10 +45,8 @@ def call_initialize(options):
def run_apprecommender(options):
try:
recommendation_size = 20
app_recommender = AppRecommender()
app_recommender.make_recommendation(recommendation_size)
app_recommender.make_recommendation()
return SUCCESS
except xapian.DatabaseOpeningError:
return ERROR_INIT
......@@ -57,24 +55,17 @@ def run_apprecommender(options):
return ERROR_TRAIN
def call_training(options):
for option, _ in options:
if option in ("-t", "--train"):
return True
return False
def run():
load_options = LoadOptions()
load_options.load()
options = load_options.options
if call_initialize(load_options.options):
if check_for_flag(options, '-i', '--init'):
print "Initializing AppRecommender"
initialize = Initialize()
initialize.prepare_data()
return SUCCESS
elif call_training(load_options.options):
elif check_for_flag(options, '-t', '--train'):
print "Training machine learning"
MachineLearning.train(MachineLearningBVA)
MachineLearning.train(MachineLearningBOW)
......
sudo apt-get install python python-xapian python-apt python-cluster python-simplejson python-numpy apt-xapian-index python-xdg debtags python-pip python-sklearn python-matplotlib python-stemmer -y
sudo apt-get install python python-xapian python-apt python-cluster python-simplejson python-numpy apt-xapian-index python-xdg debtags python-pip python-sklearn python-matplotlib python-stemmer python-fuzzywuzzy -y
sudo update-apt-xapian-index
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment