Commit b299f6ca authored by Lucas Moura's avatar Lucas Moura

Imported Upstream version 0.6.3

parent 3d30bd13
Pipeline #3839169 failed with stage
in 57 seconds
......@@ -14,3 +14,4 @@ apprecommender.egg-info/
apprecommender/tests/test_data/.sample_axi/
apprecommender/tests/test_data/.sample_axi/
apprecommender/web/submissions/
.travis.yml.container
before_script:
- ./install_dependencies.sh
- sudo pip2 install flake8
- /usr/bin/python2 -m apprecommender.main.cli --init
- /usr/bin/python2 -m apprecommender.main.cli --train
test:
script:
- flake8 .
- /usr/bin/python2 setup.py test
AppRecommender [![Build Status](https://gitlab.com/AppRecommender/AppRecommender/badges/master/build.svg)](https://gitlab.com/AppRecommender/AppRecommender/builds)
===============================================================
Application recommender for GNU/Linux systems
---------------------------------------------------------------
Install dependencies
---------------------
$ apt-get install python python-xapian python-apt python-cluster python-webpy python-simplejson python-numpy apt-xapian-index python-xdg debtags python-pip python-sklearn python-matplotlib python-stemmer -y
$ sudo update-apt-xapian-index
$ pip install setuptools
Run AppRecommender web UI
--------------------------
$ cd ./src/web
$ ./server.py
Open a browser and access http://localhost:8080
More info at https://github.com/tassia/AppRecommender/wiki
Run AppRecommender in Terminal
------------------------------
$ cd ./bin
$ ./apprec.py -s cb
Run "$ ./apprec.py -h" to view the recommender strategies
Prepare AppRecommender data
---------------------------
Run the following commands:
$ ./install_dependencies.sh
$ cd ./bin
$ ./apprec.py --init
Run Machine Learning Training
----------------------------
$ cd ./bin
$ ./apprec.py --train
......@@ -4,13 +4,12 @@
# logging level
debug = 0
verbose = 0
# output file
output = apprec.log
# output = /dev/null
[data_sources]
# base_dir = /var/lib/apprecommender/
base_dir = ~/.app-recommender/
user_data_dir = user_data/
output = apprec.log
# filters for valid packages
filters_dir = filters
pkgs_filter = desktopapps
......
......@@ -45,17 +45,20 @@ class Config(Singleton):
['/etc/apprecommender/recommender.conf',
os.path.expanduser('~/.app_recommender.rc'),
os.path.expanduser('app_recommender.cfg')])
self.home_dir = os.path.expanduser('~/.app-recommender')
except (MissingSectionHeaderError), err:
logging.error("Error in config file syntax: %s", str(err))
os.abort()
if not hasattr(self, 'initialized'):
# data_source options
# self.base_dir = '/var/lib/apprecommender'
self.base_dir = os.path.expanduser('~/.app-recommender')
self.output = os.path.join(self.home_dir, 'apprec.log')
self.user_data_dir = os.path.join(self.base_dir, "user_data/")
# general options
self.debug = 0
self.verbose = 1
self.output = os.path.join(self.base_dir, "apprec.log")
self.verbose = 0
# filters for valid packages
self.filters_dir = os.path.join(self.base_dir, "filters")
self.pkgs_filter = os.path.join(self.filters_dir, "desktopapps")
......@@ -104,7 +107,6 @@ class Config(Singleton):
self.popcon_profiling = "full"
self.load_config_file()
self.set_logger()
self.initialized = 1
logging.info("Basic config")
......@@ -127,11 +129,12 @@ class Config(Singleton):
self.debug = int(self.read_option('general', 'verbose'))
self.base_dir = os.path.expanduser(
self.read_option('data_sources', 'base_dir'))
self.output = os.path.join(
self.home_dir, self.read_option('data_sources',
'output'))
self.user_data_dir = os.path.join(
self.base_dir, self.read_option('data_sources',
'user_data_dir'))
self.output = os.path.join(
self.base_dir, self.read_option('general', 'output'))
self.filters_dir = os.path.join(
self.base_dir, self.read_option('data_sources',
'filters_dir'))
......@@ -187,7 +190,7 @@ class Config(Singleton):
Configure application logger and log level.
"""
self.logger = logging.getLogger('') # root logger is used by default
self.logger.setLevel(logging.INFO)
self.logger.setLevel(logging.WARNING)
if self.debug == 1:
log_level = logging.DEBUG
......@@ -202,11 +205,13 @@ class Config(Singleton):
console_handler.setLevel(log_level)
self.logger.addHandler(console_handler)
if not os.path.exists(self.base_dir):
os.makedirs(self.base_dir)
if not os.path.exists(self.home_dir):
os.makedirs(self.home_dir)
file_handler = logging.handlers.RotatingFileHandler(self.output,
maxBytes=50000000,
backupCount=5)
log_format = '%(asctime)s %(levelname)-8s %(message)s'
file_handler.setFormatter(logging.Formatter(
log_format, datefmt='%Y-%m-%d %H:%M:%S'))
......
......@@ -9,15 +9,16 @@ from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
class PkgInitDecider():
"""
Class used to decide if a package can be considered to recommended to an
user or not.
"""
INVALID_PREFIXES = ['ruby', 'python', 'python3', 'golang', 'gir',
'texlive']
INVALID_SUFFIXES = ['examples', 'dbg', 'data', 'dev', 'utils', 'common',
'fonts']
INVALID_PREFIXES = {'ruby', 'python', 'python3', 'golang', 'gir',
'texlive'}
INVALID_SUFFIXES = {'examples', 'dbg', 'data', 'dev', 'utils', 'common',
'fonts', 'png', 'core', 'default'}
def __init__(self):
self.cache = apt.Cache()
......@@ -50,6 +51,9 @@ class PkgInitDecider():
return user_programs
def is_section_doc(self, pkg_section):
return pkg_section == 'doc'
def is_valid_dependency(self, pkg_tags, pkg_section):
tags_dep = 'role::program' in pkg_tags or 'devel::editor' in pkg_tags
section_dep = pkg_section == 'interpreters'
......@@ -103,11 +107,19 @@ class PkgInitDecider():
pkg_candidate = self.cache[pkg].candidate
valid = (pkg_candidate and
self.is_program_dependencies_installed(pkg_candidate) and
not self.is_pkg_a_prefix_or_suffix(pkg))
if not pkg_candidate:
return False
if not self.is_program_dependencies_installed(pkg_candidate):
return False
if self.is_pkg_a_prefix_or_suffix(pkg):
return False
if self.is_section_doc(pkg_candidate.section):
return False
return valid
return True
class PkgMatchDecider(xapian.MatchDecider):
......
......@@ -102,10 +102,15 @@ class Initialize:
print "Time elapsed: %d seconds." % delta.seconds
def prepare_data(self):
if os.path.exists(self.config.base_dir):
shutil.rmtree(self.config.base_dir)
os.makedirs(self.config.base_dir)
os.makedirs(self.config.filters_dir)
try:
if os.path.exists(self.config.base_dir):
shutil.rmtree(self.config.base_dir)
os.makedirs(self.config.base_dir)
os.makedirs(self.config.filters_dir)
except OSError:
raise
tags = self.get_tags()
tags_path = "{}/debtags".format(self.config.filters_dir)
......
......@@ -54,6 +54,8 @@ class LoadOptions(Singleton):
else:
assert False, "unhandled option"
config.set_logger()
def usage(self):
"""
Print usage help.
......
......@@ -16,8 +16,7 @@ class AppRecommender:
def make_recommendation(self, print_recommendation=True):
begin_time = datetime.datetime.now()
logging.info("Computation started at %s" % begin_time)
# user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
# "desktopapps"))
user = LocalSystem()
recommendation_size = Config().num_recommendations
user_recommendation = (self.recommender.get_recommendation(
......
#!/usr/bin/env python
import xapian
from apprecommender.main.app_recommender import AppRecommender
from apprecommender.config import Config
from apprecommender.initialize import Initialize
from apprecommender.load_options import LoadOptions
from apprecommender.strategy import (MachineLearning, MachineLearningBVA,
MachineLearningBOW)
from apprecommender.main import collect_user_data
SUCCESS = 0
ERROR_INIT = 1
ERROR_TRAIN = 2
PERMISSION_DENIED = 3
def check_for_flag(options, short_flag, long_flag):
for option, _ in options:
if option in (short_flag, long_flag):
return True
return False
def run_apprecommender(options):
try:
app_recommender = AppRecommender()
app_recommender.make_recommendation()
return SUCCESS
except xapian.DatabaseOpeningError:
return ERROR_INIT
except IOError:
if "ml" in Config().strategy:
return ERROR_TRAIN
except OSError:
return PERMISSION_DENIED
def run():
load_options = LoadOptions()
load_options.load()
options = load_options.options
if check_for_flag(options, '-i', '--init'):
print "Initializing AppRecommender"
initialize = Initialize()
try:
initialize.prepare_data()
except OSError:
return PERMISSION_DENIED
return SUCCESS
elif check_for_flag(options, '-t', '--train'):
print "Training machine learning"
try:
MachineLearning.train(MachineLearningBVA)
MachineLearning.train(MachineLearningBOW)
except IOError:
return PERMISSION_DENIED
return SUCCESS
elif check_for_flag(options, '-c', '--contribute'):
collect_user_data.main()
else:
return run_apprecommender(load_options.options)
def main():
result = run()
if result is ERROR_INIT:
print "\n"
print "Please, Initialize AppRecommender"
print "Run: apprec.py --init"
elif result is ERROR_TRAIN:
print "\n"
print "Please, run Machine Learning Training"
print "Run: apprec.py --train"
elif result is PERMISSION_DENIED:
print "Please, run this command as sudo"
if __name__ == '__main__':
main()
......@@ -12,16 +12,14 @@ import tarfile
import time
import xapian
from apprecommender.app_recommender import AppRecommender
from apprecommender.main.app_recommender import AppRecommender
from apprecommender.config import Config
from apprecommender.data import get_user_installed_pkgs
from apprecommender.data_classification import get_alternative_pkg
from apprecommender.ml.data import MachineLearningData
from apprecommender.ml.pkg_time import PkgTime
from apprecommender.strategy import (MachineLearning, MachineLearningBVA,
MachineLearningBOW)
from apprecommender.utils import print_progress_bar
from bin.ml_cross_validation import ml_cross_validation
from apprecommender.main.ml_cross_validation import ml_cross_validation
LOG_PATH = os.path.expanduser('~/app_recommender_log')
SUFIX = dt.datetime.now().strftime('%Y%m%d%H%M')
......@@ -369,8 +367,6 @@ def clear_prints():
def train_machine_learning():
try:
print "Training machine learning"
MachineLearning.train(MachineLearningBVA)
MachineLearning.train(MachineLearningBOW)
os.system("cp {} {}".format(
MachineLearningData.PKGS_CLASSIFICATIONS, LOG_PATH))
......@@ -378,8 +374,11 @@ def train_machine_learning():
print "\n\nPlease check if you prepared the AppRecommender data"
print "Try to run the following commands:"
print " $ cd .."
print " $ apprec --init\n"
print " $ sudo apprec --init\n"
exit(1)
except IOError:
print "\n\nPlease run the train command before executing this script:"
print " $ sudo apprec --train\n"
def run_cross_validation():
......
......@@ -7,8 +7,6 @@ import pickle
import sys
import getopt
sys.path.insert(0, "{0}/../".format(os.path.dirname(__file__)))
from apprecommender.ml.cross_validation import (CrossValidationBVA,
CrossValidationBOW)
from apprecommender.evaluation import (SimpleAccuracy, Precision, Recall, FPR,
......@@ -37,10 +35,11 @@ def get_strategy(ml_strategy_str, pkg_data, partition_size, rounds,
def get_pkg_data(ml_strategy_str, ml_data, labels):
if ml_strategy_str == 'bow':
path = BagOfWords.BAG_OF_WORDS_PKGS_CLASSIFICATION
with open(path, 'ra') as pkgs_classification:
return pickle.load(pkgs_classification)
else:
return ml_data.create_data(labels)
path = MachineLearningData.PKGS_CLASSIFICATIONS
with open(path, 'ra') as pkgs_classification:
return pickle.load(pkgs_classification)
def ml_cross_validation(folder_path, ml_strategy_str):
......
......@@ -13,8 +13,7 @@ from apprecommender.decider import FilterTag, FilterDescription
class MachineLearningData():
XAPIAN_DATABASE_PATH = path.expanduser(
'~/.app-recommender/axi_desktopapps/')
XAPIAN_DATABASE_PATH = Config().axi_desktopapps
USER_DATA_DIR = Config().user_data_dir
BASE_DIR = Config().base_dir
......@@ -56,13 +55,15 @@ class MachineLearningData():
self.get_pkgs_table_classification(self.axi, pkgs,
cache, debtags_name,
terms_name))
self.save_pkg_data(terms_name,
MachineLearningData.MACHINE_LEARNING_TERMS)
self.save_pkg_data(debtags_name,
MachineLearningData.MACHINE_LEARNING_DEBTAGS)
self.save_pkg_data(pkgs_classifications,
MachineLearningData.PKGS_CLASSIFICATIONS)
try:
self.save_pkg_data(
terms_name, MachineLearningData.MACHINE_LEARNING_TERMS)
self.save_pkg_data(
debtags_name, MachineLearningData.MACHINE_LEARNING_DEBTAGS)
self.save_pkg_data(
pkgs_classifications, MachineLearningData.PKGS_CLASSIFICATIONS)
except IOError:
raise
return pkgs_classifications
......@@ -80,17 +81,22 @@ class MachineLearningData():
pkgs = sorted(pkgs, key=lambda pkg: pkgs_percent[pkg])
pkgs = list(reversed(pkgs))
size = len(pkgs) / len(labels)
for index, label in enumerate(labels):
index_begin = size * index
index_end = index_begin + size
classifications = dict.fromkeys(pkgs[index_begin:index_end], label)
pkgs_classification.update(classifications)
index_begin = size * len(labels)
if index_begin < len(labels):
classifications = dict.fromkeys(pkgs[index_begin], label[-1])
pkgs_classification.update(classifications)
if len(pkgs) > len(labels):
size = len(pkgs) / len(labels)
for index, label in enumerate(labels):
index_begin = size * index
index_end = index_begin + size
classifications = dict.fromkeys(pkgs[index_begin:index_end],
label)
pkgs_classification.update(classifications)
index_begin = size * len(labels)
if index_begin < len(labels):
classifications = dict.fromkeys(pkgs[index_begin], label[-1])
pkgs_classification.update(classifications)
else:
for index, pkg in enumerate(pkgs):
pkgs_classification[pkg] = labels[index]
return pkgs_classification
......@@ -204,5 +210,11 @@ class MachineLearningData():
return pkgs_classification
def save_pkg_data(self, pkg_data, file_path):
with open(file_path, 'wb') as text:
pickle.dump(pkg_data, text)
try:
ml_data = open(file_path, 'wb')
with ml_data:
pickle.dump(pkg_data, ml_data)
except IOError:
raise
......@@ -34,7 +34,6 @@ class PkgTime:
bestatime, bestmtime = 0, 0
for pkg_file in pkg_files.splitlines():
if invalid_path_regex.search(pkg_file):
continue
......
......@@ -31,7 +31,6 @@ import xapian
import numpy as np
from abc import ABCMeta, abstractmethod
from os import path
from apprecommender.config import Config
from apprecommender.decider import PkgMatchDecider
......@@ -39,7 +38,7 @@ from apprecommender.ml.bag_of_words import BagOfWords
from apprecommender.ml.bayes_matrix import BayesMatrix
from apprecommender.ml.data import MachineLearningData
XAPIAN_DATABASE_PATH = path.expanduser('~/.app-recommender/axi_desktopapps/')
XAPIAN_DATABASE_PATH = Config().axi_desktopapps
USER_DATA_DIR = Config().user_data_dir
PKGS_CLASSIFICATIONS_INDICES = (USER_DATA_DIR +
'pkgs_classifications_indices.txt')
......@@ -223,12 +222,15 @@ class MachineLearning(ContentBased):
@staticmethod
def train(cls):
if MachineLearning.PKGS_CLASSIFICATIONS is None:
ml_data = MachineLearningData()
labels = ['RU', 'U', 'NU']
MachineLearning.PKGS_CLASSIFICATIONS = ml_data.create_data(labels)
ml_data = MachineLearningData()
labels = ['RU', 'U', 'NU']
cls.run_train(MachineLearning.PKGS_CLASSIFICATIONS)
try:
MachineLearning.PKGS_CLASSIFICATIONS = ml_data.create_data(
labels)
cls.run_train(MachineLearning.PKGS_CLASSIFICATIONS)
except IOError:
raise
@abstractmethod
def get_debtags_path(self):
......
......@@ -33,14 +33,17 @@ class AxiSearchTests(unittest.TestCase):
self.axi = xapian.Database(cfg.axi)
def test_search_pkg_tags(self):
tags = axi_search_pkg_tags(self.axi, 'apticron')
print tags
self.assertEqual(set(tags), set(['XTadmin::package-management',
'XTnetwork::server',
'XTimplemented-in::shell',
tags = axi_search_pkg_tags(self.axi, 'vim')
self.assertEqual(set(tags), set(['XTscope::application',
'XTrole::program',
'XTsuite::debian', 'XTuse::monitor',
'XTworks-with::mail']))
'XTimplemented-in::c',
'XTworks-with::unicode',
'XTworks-with::text',
'XTuse::editing',
'XTinterface::commandline',
'XTdevel::editor',
'XTuitoolkit::ncurses',
'XTinterface::text-mode']))
class PopconSubmissionTests(unittest.TestCase):
......
......@@ -15,49 +15,49 @@ class PkgInitDeciderTests(unittest.TestCase):
pkg = 'python3-test'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_ruby_pkg_regex(self):
pkg = 'ruby-test'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_texlive_pkg_regex(self):
pkg = 'texlive-test'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_gir_regex(self):
pkg = 'gir1.2-test'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_golang_regex(self):
pkg = 'golang-test'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_data_regex(self):
pkg = 'test-data'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_dev_regex(self):
pkg = 'test-dev'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_utils_regex(self):
......@@ -67,19 +67,40 @@ class PkgInitDeciderTests(unittest.TestCase):
pkg = 'test-utils-1.9'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_common_regex(self):
pkg = 'test-common'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_fonts_regex(self):
pkg = 'test-fonts'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-apkg'
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_png_regex(self):
pkg = 'test-png'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_core_regex(self):
pkg = 'test-core'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
def test_pkg_default_regex(self):
pkg = 'test-default'
self.assertTrue(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
pkg = 'test-pkg'
self.assertFalse(self.pkg_init_decider.is_pkg_a_prefix_or_suffix(pkg))
......@@ -11,13 +11,6 @@ class PkgTimeTests(unittest.TestCase):
def setUp(self):
self.pkg_time = PkgTime()
def test_access_time_greater_than_modify_time(self):
pkgs_time = self.pkg_time.create_pkg_data()
for _, times in pkgs_time.iteritems():
modify, access = times
self.assertTrue(access >= modify)
@patch('commands.getoutput')
@patch('apprecommender.ml.pkg_time.get_time_from_package')
def test_invalid_paths_get_best_time(self, mock_time, mock_command):
......
......@@ -2,8 +2,8 @@
import unittest
import logging
import bin.apprec as apprec
import apprecommender.main.cli as apprec
from apprecommender.config import Config
from apprecommender.ml.data import MachineLearningData
......
......@@ -37,6 +37,7 @@ from apprecommender.error import Error
from apprecommender.singleton import Singleton
from apprecommender.decider import (FilterTag, FilterDescription,
FilterTag_or_Description)
from apprecommender.config import Config