Commit b299f6ca authored by Lucas Moura's avatar Lucas Moura

Imported Upstream version 0.6.3

parent 3d30bd13
Pipeline #3839169 failed with stage
in 57 seconds
...@@ -14,3 +14,4 @@ apprecommender.egg-info/ ...@@ -14,3 +14,4 @@ apprecommender.egg-info/
apprecommender/tests/test_data/.sample_axi/ apprecommender/tests/test_data/.sample_axi/
apprecommender/tests/test_data/.sample_axi/ apprecommender/tests/test_data/.sample_axi/
apprecommender/web/submissions/ apprecommender/web/submissions/
.travis.yml.container
before_script:
- ./install_dependencies.sh
- sudo pip2 install flake8
- /usr/bin/python2 -m apprecommender.main.cli --init
- /usr/bin/python2 -m apprecommender.main.cli --train
test:
script:
- flake8 .
- /usr/bin/python2 setup.py test
AppRecommender [![Build Status](https://gitlab.com/AppRecommender/AppRecommender/badges/master/build.svg)](https://gitlab.com/AppRecommender/AppRecommender/builds)
===============================================================
Application recommender for GNU/Linux systems
---------------------------------------------------------------
Install dependencies
---------------------
$ apt-get install python python-xapian python-apt python-cluster python-webpy python-simplejson python-numpy apt-xapian-index python-xdg debtags python-pip python-sklearn python-matplotlib python-stemmer -y
$ sudo update-apt-xapian-index
$ pip install setuptools
Run AppRecommender web UI
--------------------------
$ cd ./src/web
$ ./server.py
Open a browser and access http://localhost:8080
More info at https://github.com/tassia/AppRecommender/wiki
Run AppRecommender in Terminal
------------------------------
$ cd ./bin
$ ./apprec.py -s cb
Run "$ ./apprec.py -h" to view the recommender strategies
Prepare AppRecommender data
---------------------------
Run the following commands:
$ ./install_dependencies.sh
$ cd ./bin
$ ./apprec.py --init
Run Machine Learning Training
----------------------------
$ cd ./bin
$ ./apprec.py --train
...@@ -4,13 +4,12 @@ ...@@ -4,13 +4,12 @@
# logging level # logging level
debug = 0 debug = 0
verbose = 0 verbose = 0
# output file
output = apprec.log
# output = /dev/null
[data_sources] [data_sources]
# base_dir = /var/lib/apprecommender/
base_dir = ~/.app-recommender/ base_dir = ~/.app-recommender/
user_data_dir = user_data/ user_data_dir = user_data/
output = apprec.log
# filters for valid packages # filters for valid packages
filters_dir = filters filters_dir = filters
pkgs_filter = desktopapps pkgs_filter = desktopapps
......
...@@ -45,17 +45,20 @@ class Config(Singleton): ...@@ -45,17 +45,20 @@ class Config(Singleton):
['/etc/apprecommender/recommender.conf', ['/etc/apprecommender/recommender.conf',
os.path.expanduser('~/.app_recommender.rc'), os.path.expanduser('~/.app_recommender.rc'),
os.path.expanduser('app_recommender.cfg')]) os.path.expanduser('app_recommender.cfg')])
self.home_dir = os.path.expanduser('~/.app-recommender')
except (MissingSectionHeaderError), err: except (MissingSectionHeaderError), err:
logging.error("Error in config file syntax: %s", str(err)) logging.error("Error in config file syntax: %s", str(err))
os.abort() os.abort()
if not hasattr(self, 'initialized'): if not hasattr(self, 'initialized'):
# data_source options # data_source options
# self.base_dir = '/var/lib/apprecommender'
self.base_dir = os.path.expanduser('~/.app-recommender') self.base_dir = os.path.expanduser('~/.app-recommender')
self.output = os.path.join(self.home_dir, 'apprec.log')
self.user_data_dir = os.path.join(self.base_dir, "user_data/") self.user_data_dir = os.path.join(self.base_dir, "user_data/")
# general options # general options
self.debug = 0 self.debug = 0
self.verbose = 1 self.verbose = 0
self.output = os.path.join(self.base_dir, "apprec.log")
# filters for valid packages # filters for valid packages
self.filters_dir = os.path.join(self.base_dir, "filters") self.filters_dir = os.path.join(self.base_dir, "filters")
self.pkgs_filter = os.path.join(self.filters_dir, "desktopapps") self.pkgs_filter = os.path.join(self.filters_dir, "desktopapps")
...@@ -104,7 +107,6 @@ class Config(Singleton): ...@@ -104,7 +107,6 @@ class Config(Singleton):
self.popcon_profiling = "full" self.popcon_profiling = "full"
self.load_config_file() self.load_config_file()
self.set_logger()
self.initialized = 1 self.initialized = 1
logging.info("Basic config") logging.info("Basic config")
...@@ -127,11 +129,12 @@ class Config(Singleton): ...@@ -127,11 +129,12 @@ class Config(Singleton):
self.debug = int(self.read_option('general', 'verbose')) self.debug = int(self.read_option('general', 'verbose'))
self.base_dir = os.path.expanduser( self.base_dir = os.path.expanduser(
self.read_option('data_sources', 'base_dir')) self.read_option('data_sources', 'base_dir'))
self.output = os.path.join(
self.home_dir, self.read_option('data_sources',
'output'))
self.user_data_dir = os.path.join( self.user_data_dir = os.path.join(
self.base_dir, self.read_option('data_sources', self.base_dir, self.read_option('data_sources',
'user_data_dir')) 'user_data_dir'))
self.output = os.path.join(
self.base_dir, self.read_option('general', 'output'))
self.filters_dir = os.path.join( self.filters_dir = os.path.join(
self.base_dir, self.read_option('data_sources', self.base_dir, self.read_option('data_sources',
'filters_dir')) 'filters_dir'))
...@@ -187,7 +190,7 @@ class Config(Singleton): ...@@ -187,7 +190,7 @@ class Config(Singleton):
Configure application logger and log level. Configure application logger and log level.
""" """
self.logger = logging.getLogger('') # root logger is used by default self.logger = logging.getLogger('') # root logger is used by default
self.logger.setLevel(logging.INFO) self.logger.setLevel(logging.WARNING)
if self.debug == 1: if self.debug == 1:
log_level = logging.DEBUG log_level = logging.DEBUG
...@@ -202,11 +205,13 @@ class Config(Singleton): ...@@ -202,11 +205,13 @@ class Config(Singleton):
console_handler.setLevel(log_level) console_handler.setLevel(log_level)
self.logger.addHandler(console_handler) self.logger.addHandler(console_handler)
if not os.path.exists(self.base_dir): if not os.path.exists(self.home_dir):
os.makedirs(self.base_dir) os.makedirs(self.home_dir)
file_handler = logging.handlers.RotatingFileHandler(self.output, file_handler = logging.handlers.RotatingFileHandler(self.output,
maxBytes=50000000, maxBytes=50000000,
backupCount=5) backupCount=5)
log_format = '%(asctime)s %(levelname)-8s %(message)s' log_format = '%(asctime)s %(levelname)-8s %(message)s'
file_handler.setFormatter(logging.Formatter( file_handler.setFormatter(logging.Formatter(
log_format, datefmt='%Y-%m-%d %H:%M:%S')) log_format, datefmt='%Y-%m-%d %H:%M:%S'))
......
...@@ -9,15 +9,16 @@ from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS ...@@ -9,15 +9,16 @@ from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
class PkgInitDecider(): class PkgInitDecider():
""" """
Class used to decide if a package can be considered to recommended to an Class used to decide if a package can be considered to recommended to an
user or not. user or not.
""" """
INVALID_PREFIXES = ['ruby', 'python', 'python3', 'golang', 'gir', INVALID_PREFIXES = {'ruby', 'python', 'python3', 'golang', 'gir',
'texlive'] 'texlive'}
INVALID_SUFFIXES = ['examples', 'dbg', 'data', 'dev', 'utils', 'common', INVALID_SUFFIXES = {'examples', 'dbg', 'data', 'dev', 'utils', 'common',
'fonts'] 'fonts', 'png', 'core', 'default'}
def __init__(self): def __init__(self):
self.cache = apt.Cache() self.cache = apt.Cache()
...@@ -50,6 +51,9 @@ class PkgInitDecider(): ...@@ -50,6 +51,9 @@ class PkgInitDecider():
return user_programs return user_programs
def is_section_doc(self, pkg_section):
return pkg_section == 'doc'
def is_valid_dependency(self, pkg_tags, pkg_section): def is_valid_dependency(self, pkg_tags, pkg_section):
tags_dep = 'role::program' in pkg_tags or 'devel::editor' in pkg_tags tags_dep = 'role::program' in pkg_tags or 'devel::editor' in pkg_tags
section_dep = pkg_section == 'interpreters' section_dep = pkg_section == 'interpreters'
...@@ -103,11 +107,19 @@ class PkgInitDecider(): ...@@ -103,11 +107,19 @@ class PkgInitDecider():
pkg_candidate = self.cache[pkg].candidate pkg_candidate = self.cache[pkg].candidate
valid = (pkg_candidate and if not pkg_candidate:
self.is_program_dependencies_installed(pkg_candidate) and return False
not self.is_pkg_a_prefix_or_suffix(pkg))
if not self.is_program_dependencies_installed(pkg_candidate):
return False
if self.is_pkg_a_prefix_or_suffix(pkg):
return False
if self.is_section_doc(pkg_candidate.section):
return False
return valid return True
class PkgMatchDecider(xapian.MatchDecider): class PkgMatchDecider(xapian.MatchDecider):
......
...@@ -102,10 +102,15 @@ class Initialize: ...@@ -102,10 +102,15 @@ class Initialize:
print "Time elapsed: %d seconds." % delta.seconds print "Time elapsed: %d seconds." % delta.seconds
def prepare_data(self): def prepare_data(self):
if os.path.exists(self.config.base_dir):
shutil.rmtree(self.config.base_dir) try:
os.makedirs(self.config.base_dir) if os.path.exists(self.config.base_dir):
os.makedirs(self.config.filters_dir) shutil.rmtree(self.config.base_dir)
os.makedirs(self.config.base_dir)
os.makedirs(self.config.filters_dir)
except OSError:
raise
tags = self.get_tags() tags = self.get_tags()
tags_path = "{}/debtags".format(self.config.filters_dir) tags_path = "{}/debtags".format(self.config.filters_dir)
......
...@@ -54,6 +54,8 @@ class LoadOptions(Singleton): ...@@ -54,6 +54,8 @@ class LoadOptions(Singleton):
else: else:
assert False, "unhandled option" assert False, "unhandled option"
config.set_logger()
def usage(self): def usage(self):
""" """
Print usage help. Print usage help.
......
...@@ -16,8 +16,7 @@ class AppRecommender: ...@@ -16,8 +16,7 @@ class AppRecommender:
def make_recommendation(self, print_recommendation=True): def make_recommendation(self, print_recommendation=True):
begin_time = datetime.datetime.now() begin_time = datetime.datetime.now()
logging.info("Computation started at %s" % begin_time) logging.info("Computation started at %s" % begin_time)
# user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
# "desktopapps"))
user = LocalSystem() user = LocalSystem()
recommendation_size = Config().num_recommendations recommendation_size = Config().num_recommendations
user_recommendation = (self.recommender.get_recommendation( user_recommendation = (self.recommender.get_recommendation(
......
#!/usr/bin/env python
import xapian
from apprecommender.main.app_recommender import AppRecommender
from apprecommender.config import Config
from apprecommender.initialize import Initialize
from apprecommender.load_options import LoadOptions
from apprecommender.strategy import (MachineLearning, MachineLearningBVA,
MachineLearningBOW)
from apprecommender.main import collect_user_data
SUCCESS = 0
ERROR_INIT = 1
ERROR_TRAIN = 2
PERMISSION_DENIED = 3
def check_for_flag(options, short_flag, long_flag):
for option, _ in options:
if option in (short_flag, long_flag):
return True
return False
def run_apprecommender(options):
try:
app_recommender = AppRecommender()
app_recommender.make_recommendation()
return SUCCESS
except xapian.DatabaseOpeningError:
return ERROR_INIT
except IOError:
if "ml" in Config().strategy:
return ERROR_TRAIN
except OSError:
return PERMISSION_DENIED
def run():
load_options = LoadOptions()
load_options.load()
options = load_options.options
if check_for_flag(options, '-i', '--init'):
print "Initializing AppRecommender"
initialize = Initialize()
try:
initialize.prepare_data()
except OSError:
return PERMISSION_DENIED
return SUCCESS
elif check_for_flag(options, '-t', '--train'):
print "Training machine learning"
try:
MachineLearning.train(MachineLearningBVA)
MachineLearning.train(MachineLearningBOW)
except IOError:
return PERMISSION_DENIED
return SUCCESS
elif check_for_flag(options, '-c', '--contribute'):
collect_user_data.main()
else:
return run_apprecommender(load_options.options)
def main():
result = run()
if result is ERROR_INIT:
print "\n"
print "Please, Initialize AppRecommender"
print "Run: apprec.py --init"
elif result is ERROR_TRAIN:
print "\n"
print "Please, run Machine Learning Training"
print "Run: apprec.py --train"
elif result is PERMISSION_DENIED:
print "Please, run this command as sudo"
if __name__ == '__main__':
main()
...@@ -12,16 +12,14 @@ import tarfile ...@@ -12,16 +12,14 @@ import tarfile
import time import time
import xapian import xapian
from apprecommender.app_recommender import AppRecommender from apprecommender.main.app_recommender import AppRecommender
from apprecommender.config import Config from apprecommender.config import Config
from apprecommender.data import get_user_installed_pkgs from apprecommender.data import get_user_installed_pkgs
from apprecommender.data_classification import get_alternative_pkg from apprecommender.data_classification import get_alternative_pkg
from apprecommender.ml.data import MachineLearningData from apprecommender.ml.data import MachineLearningData
from apprecommender.ml.pkg_time import PkgTime from apprecommender.ml.pkg_time import PkgTime
from apprecommender.strategy import (MachineLearning, MachineLearningBVA,
MachineLearningBOW)
from apprecommender.utils import print_progress_bar from apprecommender.utils import print_progress_bar
from bin.ml_cross_validation import ml_cross_validation from apprecommender.main.ml_cross_validation import ml_cross_validation
LOG_PATH = os.path.expanduser('~/app_recommender_log') LOG_PATH = os.path.expanduser('~/app_recommender_log')
SUFIX = dt.datetime.now().strftime('%Y%m%d%H%M') SUFIX = dt.datetime.now().strftime('%Y%m%d%H%M')
...@@ -369,8 +367,6 @@ def clear_prints(): ...@@ -369,8 +367,6 @@ def clear_prints():
def train_machine_learning(): def train_machine_learning():
try: try:
print "Training machine learning" print "Training machine learning"
MachineLearning.train(MachineLearningBVA)
MachineLearning.train(MachineLearningBOW)
os.system("cp {} {}".format( os.system("cp {} {}".format(
MachineLearningData.PKGS_CLASSIFICATIONS, LOG_PATH)) MachineLearningData.PKGS_CLASSIFICATIONS, LOG_PATH))
...@@ -378,8 +374,11 @@ def train_machine_learning(): ...@@ -378,8 +374,11 @@ def train_machine_learning():
print "\n\nPlease check if you prepared the AppRecommender data" print "\n\nPlease check if you prepared the AppRecommender data"
print "Try to run the following commands:" print "Try to run the following commands:"
print " $ cd .." print " $ cd .."
print " $ apprec --init\n" print " $ sudo apprec --init\n"
exit(1) exit(1)
except IOError:
print "\n\nPlease run the train command before executing this script:"
print " $ sudo apprec --train\n"
def run_cross_validation(): def run_cross_validation():
......
...@@ -7,8 +7,6 @@ import pickle ...@@ -7,8 +7,6 @@ import pickle
import sys import sys
import getopt import getopt
sys.path.insert(0, "{0}/../".format(os.path.dirname(__file__)))
from apprecommender.ml.cross_validation import (CrossValidationBVA, from apprecommender.ml.cross_validation import (CrossValidationBVA,
CrossValidationBOW) CrossValidationBOW)
from apprecommender.evaluation import (SimpleAccuracy, Precision, Recall, FPR, from apprecommender.evaluation import (SimpleAccuracy, Precision, Recall, FPR,
...@@ -37,10 +35,11 @@ def get_strategy(ml_strategy_str, pkg_data, partition_size, rounds, ...@@ -37,10 +35,11 @@ def get_strategy(ml_strategy_str, pkg_data, partition_size, rounds,
def get_pkg_data(ml_strategy_str, ml_data, labels): def get_pkg_data(ml_strategy_str, ml_data, labels):
if ml_strategy_str == 'bow': if ml_strategy_str == 'bow':
path = BagOfWords.BAG_OF_WORDS_PKGS_CLASSIFICATION path = BagOfWords.BAG_OF_WORDS_PKGS_CLASSIFICATION
with open(path, 'ra') as pkgs_classification:
return pickle.load(pkgs_classification)
else: else:
return ml_data.create_data(labels) path = MachineLearningData.PKGS_CLASSIFICATIONS
with open(path, 'ra') as pkgs_classification:
return pickle.load(pkgs_classification)
def ml_cross_validation(folder_path, ml_strategy_str): def ml_cross_validation(folder_path, ml_strategy_str):
......
...@@ -13,8 +13,7 @@ from apprecommender.decider import FilterTag, FilterDescription ...@@ -13,8 +13,7 @@ from apprecommender.decider import FilterTag, FilterDescription
class MachineLearningData(): class MachineLearningData():
XAPIAN_DATABASE_PATH = path.expanduser( XAPIAN_DATABASE_PATH = Config().axi_desktopapps
'~/.app-recommender/axi_desktopapps/')
USER_DATA_DIR = Config().user_data_dir USER_DATA_DIR = Config().user_data_dir
BASE_DIR = Config().base_dir BASE_DIR = Config().base_dir
...@@ -56,13 +55,15 @@ class MachineLearningData(): ...@@ -56,13 +55,15 @@ class MachineLearningData():
self.get_pkgs_table_classification(self.axi, pkgs, self.get_pkgs_table_classification(self.axi, pkgs,
cache, debtags_name, cache, debtags_name,
terms_name)) terms_name))
try:
self.save_pkg_data(terms_name, self.save_pkg_data(
MachineLearningData.MACHINE_LEARNING_TERMS) terms_name, MachineLearningData.MACHINE_LEARNING_TERMS)
self.save_pkg_data(debtags_name, self.save_pkg_data(
MachineLearningData.MACHINE_LEARNING_DEBTAGS) debtags_name, MachineLearningData.MACHINE_LEARNING_DEBTAGS)
self.save_pkg_data(pkgs_classifications, self.save_pkg_data(
MachineLearningData.PKGS_CLASSIFICATIONS) pkgs_classifications, MachineLearningData.PKGS_CLASSIFICATIONS)
except IOError:
raise
return pkgs_classifications return pkgs_classifications
...@@ -80,17 +81,22 @@ class MachineLearningData(): ...@@ -80,17 +81,22 @@ class MachineLearningData():
pkgs = sorted(pkgs, key=lambda pkg: pkgs_percent[pkg]) pkgs = sorted(pkgs, key=lambda pkg: pkgs_percent[pkg])
pkgs = list(reversed(pkgs)) pkgs = list(reversed(pkgs))
size = len(pkgs) / len(labels) if len(pkgs) > len(labels):
for index, label in enumerate(labels): size = len(pkgs) / len(labels)
index_begin = size * index for index, label in enumerate(labels):
index_end = index_begin + size index_begin = size * index
classifications = dict.fromkeys(pkgs[index_begin:index_end], label) index_end = index_begin + size
pkgs_classification.update(classifications) classifications = dict.fromkeys(pkgs[index_begin:index_end],
label)
index_begin = size * len(labels) pkgs_classification.update(classifications)
if index_begin < len(labels):
classifications = dict.fromkeys(pkgs[index_begin], label[-1]) index_begin = size * len(labels)
pkgs_classification.update(classifications) if index_begin < len(labels):
classifications = dict.fromkeys(pkgs[index_begin], label[-1])
pkgs_classification.update(classifications)
else:
for index, pkg in enumerate(pkgs):
pkgs_classification[pkg] = labels[index]
return pkgs_classification return pkgs_classification
...@@ -204,5 +210,11 @@ class MachineLearningData(): ...@@ -204,5 +210,11 @@ class MachineLearningData():
return pkgs_classification return pkgs_classification
def save_pkg_data(self, pkg_data, file_path): def save_pkg_data(self, pkg_data, file_path):
with open(file_path, 'wb') as text: try:
pickle.dump(pkg_data, text) ml_data = open(file_path, 'wb')
with ml_data:
pickle.dump(pkg_data, ml_data)
except IOError:
raise
...@@ -34,7 +34,6 @@ class PkgTime: ...@@ -34,7 +34,6 @@ class PkgTime:
bestatime, bestmtime = 0, 0 bestatime, bestmtime = 0, 0
for pkg_file in pkg_files.splitlines(): for pkg_file in pkg_files.splitlines():
if invalid_path_regex.search(pkg_file): if invalid_path_regex.search(pkg_file):
continue continue
......
...@@ -31,7 +31,6 @@ import xapian ...@@ -31,7 +31,6 @@ import xapian
import numpy as np import numpy as np
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from os import path
from apprecommender.config import Config from apprecommender.config import Config
from apprecommender.decider import PkgMatchDecider from apprecommender.decider import PkgMatchDecider
...@@ -39,7 +38,7 @@ from apprecommender.ml.bag_of_words import BagOfWords ...@@ -39,7 +38,7 @@ from apprecommender.ml.bag_of_words import BagOfWords
from apprecommender.ml.bayes_matrix import BayesMatrix from apprecommender.ml.bayes_matrix import BayesMatrix
from apprecommender.ml.data import MachineLearningData from apprecommender.ml.data import MachineLearningData
XAPIAN_DATABASE_PATH = path.expanduser('~/.app-recommender/axi_desktopapps/') XAPIAN_DATABASE_PATH = Config().axi_desktopapps
USER_DATA_DIR = Config().user_data_dir USER_DATA_DIR = Config().user_data_dir
PKGS_CLASSIFICATIONS_INDICES = (USER_DATA_DIR + PKGS_CLASSIFICATIONS_INDICES = (USER_DATA_DIR +
'pkgs_classifications_indices.txt') 'pkgs_classifications_indices.txt')
...@@ -223,12 +222,15 @@ class MachineLearning(ContentBased): ...@@ -223,12 +222,15 @@ class MachineLearning(ContentBased):
@staticmethod @staticmethod
def train(cls): def train(cls):
if MachineLearning.PKGS_CLASSIFICATIONS is None: ml_data = MachineLearningData()
ml_data = MachineLearningData() labels = ['RU', 'U', 'NU']
labels = ['RU', 'U', 'NU']
MachineLearning.PKGS_CLASSIFICATIONS = ml_data.create_data(labels)
cls.run_train(MachineLearning.PKGS_CLASSIFICATIONS) try:
MachineLearning.PKGS_CLASSIFICATIONS = ml_data.create_data(
labels)
cls.run_train(MachineLearning.PKGS_CLASSIFICATIONS)
except IOError:
raise
@abstractmethod @abstractmethod
def get_debtags_path(self): def get_debtags_path(self):
......
...@@ -33,14 +33,17 @@ class AxiSearchTests(unittest.TestCase): ...@@ -33,14 +33,17 @@ class AxiSearchTests(unittest.TestCase):
self.axi = xapian.Database(cfg.axi) self.axi = xapian.Database(cfg.axi)
def test_search_pkg_tags(self): def test_search_pkg_tags(self):
tags = axi_search_pkg_tags(self.axi, 'apticron') tags = axi_search_pkg_tags(self.axi, 'vim')
print tags self.assertEqual(set(tags), set(['XTscope::application',
self.assertEqual(set(tags), set(['XTadmin::package-management',
'XTnetwork::server',
'XTimplemented-in::shell',
'XTrole::program', 'XTrole::program',
'XTsuite::debian', 'XTuse::monitor', 'XTimplemented-in::c',
'XTworks-with::mail'])) 'XTworks-with::unicode',
'XTworks-with::text',
'XTuse::editing',