Commit f7dc55a5 authored by Lucas Moura's avatar Lucas Moura

Imported Upstream version 0.5.2

parents
*.pyc
TODO
doc/doxygen-1.7.3
doc/html/
popcon-data.tgz
popcon-data/
psql-out
udd-import.log
udd.sql.gz
*.log
.vagrant/
*.swp
apprecommender.egg-info/
apprecommender/tests/test_data/.sample_axi/
apprecommender/tests/test_data/.sample_axi/
apprecommender/web/submissions/
This diff is collapsed.
AppRecommender - Application recommender for GNU/Linux systems
---------------------------------------------------------------
Install dependencies
---------------------
# apt-get install python python-xapian python-apt python-cluster python-webpy python-simplejson python-numpy apt-xapian-index python-xdg debtags python-pip python-sklearn python-nltk python-matplotlib -y
sudo update-apt-xapian-index
pip install setuptools
Run AppRecommender web UI
--------------------------
$ cd ./src/web
$ ./server.py
Open a browser and access http://localhost:8080
More info at https://github.com/tassia/AppRecommender/wiki
Run AppRecommender in Terminal
------------------------------
$ cd ./bin
$ ./apprec.py -s cb
Run "$ ./apprec.py -h" to view the recommender strategies
Prepare AppRecommender data
---------------------------
Run the following commands:
$ ./install_dependencies.sh
$ cd ./bin
$ ./apprec.py --init
Run Machine Learning Training
----------------------------
$ cd ./bin
$ ./apprec.py --train
# -*- mode: ruby -*-
# vi: set ft=ruby :
Vagrant.configure(2) do |config|
config.vm.box = "debian/jessie64"
config.vm.box_check_update = false
config.vm.network :forwarded_port, host: 8080, guest: 8080
config.vm.provision :shell, path: "vagrant/bootstrap.sh", privileged: false
config.vm.provider "virtualbox" do |vm|
vm.memory = 1024
vm.cpus = 2
end
end
# Config file for AppRecommender
[general]
# logging level
debug = 0
verbose = 0
# output file
output = apprec.log
# output = /dev/null
[data_sources]
base_dir = ~/.app-recommender/
user_data_dir = user_data/
# filters for valid packages
filters_dir = filters
pkgs_filter = desktopapps
# package information indexes
axi = /var/lib/apt-xapian-index/index
axi_programs = axi_programs
axi_desktopapps = axi_desktopapps
# old, reindex, cluster, recluster
#index_mode = old
# popcon indexes
# check if there are popcon indexes available
popcon = 0
popcon_programs = popcon_programs
popcon_desktopapps = popcon_desktopapps
popcon_index = popcon_desktopapps
popcon_dir = popcon-entries
# number of popcon submission for indexing
max_popcon = 100000000
# popcon clustering
clusters_dir = clusters_dir_full
k_medoids = 100
# Connection to DDE
dde_url = http://46.4.235.200:8000/q/udd/packages/all/%s?t=json
self.dde_server = 46.4.235.200
self.dde_port = 8000
[recommender]
# search weighting scheme ('trad' or 'bm25')
weight = bm25
# bm25 parameters
bm25_k1 = 1.2
bm25_k2 = 0
bm25_k3 = 7
bm25_b = 0.75
bm25_nl = 0.5
# recommendation strategy
strategy = cb
# user content profile size
profile_size = 50
# neighborhood size
k_neighbors = 50
popcon_profiling = full
doc:
./generate_doc.sh
clean :
@find . -name \*.pyc -delete
# \mainpage Application recommender for GNU/Linux systems
# Main repository: http://github.com/tassia/AppRecommender
#!/usr/bin/env python
import logging
import datetime
from apprecommender.recommender import Recommender
from apprecommender.user import LocalSystem
class AppRecommender:
def __init__(self):
self.recommender = Recommender()
def make_recommendation(self, recommendation_size):
begin_time = datetime.datetime.now()
logging.info("Computation started at %s" % begin_time)
# user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
# "desktopapps"))
user = LocalSystem()
user_reccomendation = (self.recommender.get_recommendation(
user, recommendation_size))
logging.info("Recommending applications for user %s" % user.user_id)
logging.info(user_reccomendation)
end_time = datetime.datetime.now()
logging.info("Computation completed at %s" % end_time)
delta = end_time - begin_time
logging.info("Time elapsed: %d seconds." % delta.seconds)
return user_reccomendation
#!/usr/bin/env python
"""
config - python module for configuration options.
"""
__author__ = "Tassia Camoes Araujo <tassia@gmail.com>"
__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo"
__license__ = """
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import sys
import os
import logging
import logging.handlers
from ConfigParser import ConfigParser, MissingSectionHeaderError
from apprecommender.singleton import Singleton
class Config(Singleton):
"""
Class to handle configuration options.
"""
def __init__(self):
"""
Set default configuration options.
"""
try:
self.config_parser = ConfigParser()
self.config_parser.read(
['/etc/apprecommender/recommender.conf',
os.path.expanduser('~/.app_recommender.rc'),
os.path.expanduser('app_recommender.cfg')])
except (MissingSectionHeaderError), err:
logging.error("Error in config file syntax: %s", str(err))
os.abort()
if not hasattr(self, 'initialized'):
# data_source options
self.base_dir = os.path.expanduser('~/.app-recommender')
self.user_data_dir = os.path.join(self.base_dir, "user_data/")
# general options
self.debug = 0
self.verbose = 1
self.output = os.path.join(self.base_dir, "apprec.log")
# filters for valid packages
self.filters_dir = os.path.join(self.base_dir, "filters")
self.pkgs_filter = os.path.join(self.filters_dir, "desktopapps")
# package information packages
self.axi = "/var/lib/apt-xapian-index/index"
self.axi_programs = os.path.join(self.base_dir, "axi_programs")
self.axi_desktopapps = os.path.join(self.base_dir,
"axi_desktopapps")
# popcon indexes
self.index_mode = "old"
# check if there are popcon indexes available
self.popcon = 0
self.popcon_programs = os.path.join(self.base_dir,
"popcon_programs")
self.popcon_desktopapps = os.path.join(self.base_dir,
"popcon_desktopapps")
self.popcon_index = self.popcon_desktopapps
self.popcon_dir = os.path.join(self.base_dir, "popcon-entries")
self.max_popcon = 1000
# popcon clustering
self.clusters_dir = os.path.join(self.base_dir, "clusters-dir")
self.k_medoids = 100
# self.dde_url = "http://dde.debian.net/dde/" \
# "q/udd/packs/all/%s?t=json"
self.dde_url = "http://46.4.235.200:8000/" \
"q/udd/packages/prio-debian-sid/%s?t=json"
self.dde_server = "46.4.235.200"
self.dde_port = 8000
# recomender options
self.strategy = "cb"
self.weight = "bm25"
self.bm25_k1 = 1.2
self.bm25_k2 = 0
self.bm25_k3 = 7
self.bm25_b = 0.75
self.bm25_nl = 0.5
# user content profile size
self.profile_size = 10
# neighborhood size
self.k_neighbors = 50
# popcon profiling method: full, voted
self.popcon_profiling = "full"
self.load_config_file()
self.set_logger()
self.initialized = 1
logging.info("Basic config")
def read_option(self, section, option):
"""
Read option from configuration file if it is defined there or return
default value.
"""
var = "self.%s" % option
if self.config_parser.has_option(section, option):
return self.config_parser.get(section, option)
else:
return eval(var)
def load_config_file(self):
"""
Load options from configuration file and command line arguments.
"""
self.debug = int(self.read_option('general', 'debug'))
self.debug = int(self.read_option('general', 'verbose'))
self.base_dir = os.path.expanduser(
self.read_option('data_sources', 'base_dir'))
self.user_data_dir = os.path.join(
self.base_dir, self.read_option('data_sources',
'user_data_dir'))
self.output = os.path.join(
self.base_dir, self.read_option('general', 'output'))
self.filters_dir = os.path.join(
self.base_dir, self.read_option('data_sources',
'filters_dir'))
self.pkgs_filter = os.path.join(
self.filters_dir, self.read_option('data_sources',
'pkgs_filter'))
self.axi = self.read_option('data_sources', 'axi')
self.axi_programs = os.path.join(
self.base_dir, self.read_option('data_sources',
'axi_programs'))
self.axi_desktopapps = os.path.join(
self.base_dir, self.read_option('data_sources',
'axi_desktopapps'))
# self.index_mode = self.read_option('data_sources', 'index_mode')
self.popcon = int(self.read_option('data_sources', 'popcon'))
self.popcon_programs = os.path.join(
self.base_dir, self.read_option('data_sources',
'popcon_programs'))
self.popcon_desktopapps = os.path.join(
self.base_dir, self.read_option('data_sources',
'popcon_desktopapps'))
self.popcon_index = os.path.join(
self.base_dir, self.read_option('data_sources',
'popcon_index'))
self.popcon_dir = os.path.join(
self.base_dir, self.read_option('data_sources',
'popcon_dir'))
self.max_popcon = int(self.read_option('data_sources', 'max_popcon'))
self.clusters_dir = os.path.join(
self.base_dir, self.read_option('data_sources',
'clusters_dir'))
self.k_medoids = int(self.read_option('data_sources', 'k_medoids'))
self.dde_url = self.read_option('data_sources', 'dde_url')
self.dde_server = self.read_option('data_sources', 'dde_server')
self.dde_port = self.read_option('data_sources', 'dde_port')
self.weight = self.read_option('recommender', 'weight')
self.bm25_k1 = float(self.read_option('recommender', 'bm25_k1'))
self.bm25_k2 = float(self.read_option('recommender', 'bm25_k2'))
self.bm25_k3 = float(self.read_option('recommender', 'bm25_k3'))
self.bm25_b = float(self.read_option('recommender', 'bm25_b'))
self.bm25_nl = float(self.read_option('recommender', 'bm25_nl'))
self.strategy = self.read_option('recommender', 'strategy')
self.profile_size = int(
self.read_option('recommender', 'profile_size'))
self.k_neighbors = int(
self.read_option('recommender', 'k_neighbors'))
self.popcon_profiling = self.read_option(
'recommender', 'popcon_profiling')
def set_logger(self):
"""
Configure application logger and log level.
"""
self.logger = logging.getLogger('') # root logger is used by default
self.logger.setLevel(logging.DEBUG)
if self.debug == 1:
log_level = logging.DEBUG
elif self.verbose == 1:
log_level = logging.INFO
else:
log_level = logging.WARNING
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(logging.Formatter(
'%(levelname)s: %(message)s'))
console_handler.setLevel(log_level)
self.logger.addHandler(console_handler)
if not os.path.exists(self.base_dir):
os.makedirs(self.base_dir)
file_handler = logging.handlers.RotatingFileHandler(self.output,
maxBytes=50000000,
backupCount=5)
log_format = '%(asctime)s %(levelname)-8s %(message)s'
file_handler.setFormatter(logging.Formatter(
log_format, datefmt='%Y-%m-%d %H:%M:%S'))
file_handler.setLevel(log_level)
self.logger.addHandler(file_handler)
logging.info("Set up logger")
This diff is collapsed.
#!/usr/bin/env python
import commands
import calendar
import logging
import math
import operator
import time
pkgs_times = {}
pkgs_time_weight = {}
best_weight_terms = {}
user_tfidf_weights = {}
def pkg_name_with_error(pkg):
return len(pkg.split()) > 1
def get_time_from_package(pkg, pkg_bin=True):
if pkg_name_with_error(pkg):
pkgs_times[pkg] = [None, None]
if pkg in pkgs_times:
modify, access = pkgs_times[pkg]
else:
modify = get_time('Y', pkg, pkg_bin)
access = get_time('X', pkg, pkg_bin)
pkgs_times[pkg] = [modify, access]
return pkgs_times[pkg]
def get_alternative_pkg(pkg):
dpkg_command = "dpkg -L {0}| grep /usr/bin/"
dpkg_command += " || dpkg -L {0}| grep /usr/sbin/"
pkg_bin = commands.getoutput(dpkg_command.format(pkg))
possible_pkgs = {}
for pkg_path in pkg_bin.splitlines():
possible_pkgs[pkg_path] = get_time('X', pkg_path)
if bool(possible_pkgs):
return sorted(possible_pkgs.items(), key=operator.itemgetter(1))[0][0]
return None
def get_time(option, pkg, pkg_bin=True):
stat_base = "stat -c '%{option}'"
stat_base += " `which {package}`" if pkg_bin else " {package}"
stat_error = 'stat:'
stat_time = stat_base.format(option=option, package=pkg)
pkg_time = commands.getoutput(stat_time)
if stat_error not in pkg_time:
return pkg_time
return None
def linear_percent_function(modify, access, time_now):
modify, access = int(modify), int(access)
time_access = access - modify
time_actual = time_now - modify
percent = float(time_access) / float(time_actual)
return percent
def get_pkg_time_weight(pkg):
modify, access = get_time_from_package(pkg)
if not modify and not access:
modify, access = get_time_from_package(get_alternative_pkg(pkg))
if not modify and not access:
return 0
time_now = calendar.timegm(time.gmtime())
return linear_percent_function(modify, access, time_now)
def calculate_time_curve(pkg_time_weight):
if not pkg_time_weight:
return 0
const_a = 10
lambda_value = 1
return const_a * (1 / math.exp((1 - pkg_time_weight) * lambda_value))
def time_weight(term, term_list):
weight = []
weight_len = 5
weight_delta = 0.2
for pkg in term_list:
if pkg in pkgs_time_weight:
weight.append(pkgs_time_weight[pkg])
else:
pkg_time_weight = get_pkg_time_weight(pkg)
pkgs_time_weight[pkg] = pkg_time_weight
weight.append(calculate_time_curve(pkg_time_weight))
weight = list(reversed(sorted(weight)))
if len(weight) < weight_len:
for i in range(len(weight), weight_len):
weight.append(weight[-1] - weight_delta)
time_weight = float(sum(weight[0:weight_len])) / float(weight_len)
best_weight_terms[term] = time_weight
return time_weight
def print_best_weight_terms(terms_package):
index = 0
total = 0
logging.info("BEST TERMS")
for term in sorted(best_weight_terms, key=best_weight_terms.get,
reverse=True):
if index < 10:
logging.info("\n")
logging.info(term, best_weight_terms[term])
logging.info('-')
for pkg in terms_package[term]:
logging.info("[{0}: {1} {2}]\n".format(pkg,
get_pkg_time_weight(pkg),
get_alternative_pkg(pkg)))
total += 1
if total > 5:
break
total = 0
index += 1
#!/usr/bin/env python
import re
import xapian
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
class PkgMatchDecider(xapian.MatchDecider):
"""
Extend xapian.MatchDecider to not consider installed packages.
"""
def __init__(self, pkgs_list):
"""
Set initial parameters.
"""
xapian.MatchDecider.__init__(self)
self.pkgs_list = pkgs_list
def __call__(self, doc):
"""
True if the package is not already installed and is not a lib or a doc.
"""
pkg = doc.get_data()
is_new = pkg not in self.pkgs_list
is_new = is_new and ':' not in pkg
if "kde" in pkg:
return is_new and "kde" in self.pkgs_list
if "gnome" in pkg:
return is_new and "gnome" in self.pkgs_list
if re.match(r'^lib.*', pkg) or re.match(r'.*doc$', pkg):
return False
return is_new
class PkgExpandDecider(xapian.ExpandDecider):
"""
Extend xapian.ExpandDecider to consider packages only.
"""
def __init__(self, pkgs_list):
"""
Set initial parameters.
"""
xapian.ExpandDecider.__init__(self)
self.pkgs_list = pkgs_list
def __call__(self, term):
"""
True if the term is a package.
"""
pkg = term.lstrip("XP")
is_new_pkg = pkg not in self.pkgs_list and term.startswith("XP")
if "kde" in pkg:
return is_new_pkg and "kde" in self.pkgs_list
if "gnome" in pkg:
return is_new_pkg and "gnome" in self.pkgs_list
return is_new_pkg
class TagExpandDecider(xapian.ExpandDecider):
"""
Extend xapian.ExpandDecider to consider tags only.
"""
def __call__(self, term):
"""
True if the term is a package tag.
"""
return term.startswith("XT")
class FilterTag(xapian.ExpandDecider):
"""
Extend xapian.ExpandDecider to consider only tag terms.
"""
def __init__(self, valid_tags):
"""
Set initial parameters.