Commit 159b46f8 authored by Lucas Moura's avatar Lucas Moura

Imported Upstream version 0.6.1

parent 3f7bd4ce
......@@ -13,22 +13,23 @@ class AppRecommender:
self.recommender = Recommender()
self.config = Config()
def make_recommendation(self):
def make_recommendation(self, print_recommendation=True):
begin_time = datetime.datetime.now()
logging.info("Computation started at %s" % begin_time)
# user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
# "desktopapps"))
user = LocalSystem()
recommendation_size = Config().num_recommendations
user_reccomendation = (self.recommender.get_recommendation(
user_recommendation = (self.recommender.get_recommendation(
user, recommendation_size))
logging.info("Recommending applications for user %s" % user.user_id)
print (user_reccomendation)
if print_recommendation:
print(user_recommendation)
end_time = datetime.datetime.now()
logging.info("Computation completed at %s" % end_time)
delta = end_time - begin_time
logging.info("Time elapsed: %d seconds." % delta.seconds)
return user_reccomendation
return user_recommendation
#!/bin/bash
echo "Para executar o AppRecommender as seguintes dependencias serao instaladas:"
echo ""
echo "python python-xapian python-apt python-cluster python-webpy"
echo "python-simplejson python-numpy apt-xapian-index python-xdg debtags"
echo "python-pip python-sklearn python-matplotlib python-stemmer"
echo ""
echo "Apos a instalação das dependencias os pacotes serao indexados ao xapian, que é o banco de dados utilizado pelo AppRecommender"
echo ""
cd bin/data_collect/
./install_dependencies.sh
cd -
cd bin/
echo ""
echo "Agora os dados do AppRecommender serao inicializados"
./apprec.py --init
cd data_collect/
echo ""
echo "Iniciando a coleta de dados"
./collect_user_data.py
echo ""
echo "Desinstalando as dependencias do AppRecommender"
./remove_dependencies.sh
echo ""
echo ""
echo ""
echo "Compacte o arquivo de log que está na home"
echo "o nome do arquivo comeca com 'app_recommender_log'"
echo "$ cd ~"
echo "$ tar -zcvf [nome_da_pasta].tar.gz [nome_da_pasta]"
echo ""
echo "Envie o arquivo compactado para um dos seguintes emails:"
echo "lucianopcbr@gmail.com"
echo "lucas.moura128@gmail.com"
echo ""
echo "Como titulo do e-mail utilize 'coleta de dados'"
echo ""
echo ""
echo "Obrigado por colaborar com nosso trabalho"
echo ""
echo "Att,"
echo "Lucas Moura e Luciano Prestes"
echo ""
This diff is collapsed.
sudo apt-get install python-xapian python-cluster python-simplejson python-numpy apt-xapian-index debtags -y
sudo update-apt-xapian-index
#!/usr/bin/env python
import commands
from load_data import get_folder_path, get_all_folders_path
def get_cross_validations_path(folders_path):
files = []
for folder_path in folders_path:
all_files = commands.getoutput(
"ls {}".format(folder_path)).splitlines()
files += [folder_path + f for f in all_files
if f.startswith('cross_validation_result')]
return files
def get_metrics_values(files_path):
metrics_values = {'S_Accuracy': [], 'Precision': [], 'Recall': [],
'FPR': [], 'F(1.0)': []}
for file_path in files_path:
with open(file_path, 'rb') as text:
lines = [line.strip() for line in text]
for line in lines:
line_split = line.split(':')
metric = line_split[0].strip()
if metric in metrics_values.keys() and len(line_split[1]) > 0:
value = float(line_split[1])
metrics_values[metric].append(value)
return metrics_values
def convert_to_csv(metrics_values):
rows = []
metrics = ';'.join(metrics_values.keys())
rows.append(metrics)
for index in range(len(metrics_values.values()[0])):
row = []
for metric in metrics_values.keys():
row.append(metrics_values[metric][index])
row = ';'.join(str(element) for element in row)
rows.append(row)
return rows
def main():
folder_path = get_folder_path()
all_folders_path = get_all_folders_path(folder_path)
files_path = get_cross_validations_path(all_folders_path)
metrics_values = get_metrics_values(files_path)
csv_rows = convert_to_csv(metrics_values)
for row in csv_rows:
print row
if __name__ == '__main__':
main()
import commands
import os
import sys
def get_folder_path():
usage_message = "Usage: {} [folder_path]".format(sys.argv[0])
if len(sys.argv) < 2:
print usage_message
exit(1)
folder_path = sys.argv[1]
folder_path = os.path.expanduser(folder_path)
if not folder_path.endswith('/'):
folder_path += '/'
if not os.path.exists(folder_path):
print usage_message
print "Folder do not exist"
exit(1)
return folder_path
def get_all_folders_path(folder_path):
folders_path = commands.getoutput("ls {}".format(folder_path)).splitlines()
folders_path = [folder for folder in folders_path
if folder.startswith('app_recommender_log')]
folders_path = ["{}{}/".format(folder_path, folder)
for folder in folders_path]
return folders_path
def get_csv_file_path():
usage_message = "Usage: {} [csv_file_path]".format(sys.argv[0])
if len(sys.argv) < 2:
print usage_message
exit(1)
csv_file_path = sys.argv[1]
csv_file_path = os.path.expanduser(csv_file_path)
if not os.path.exists(csv_file_path):
print usage_message
print "CSV file not exists"
exit(1)
return csv_file_path
def get_lines_from_csv_file(csv_file_path):
with open(csv_file_path, 'rb') as text:
lines = [line.strip() for line in text]
lines = [line.split(';') for line in lines]
return lines
#!/usr/bin/env python
import commands
from load_data import get_folder_path, get_all_folders_path
def load_user_preferences(folder_path):
preferences_file = "{}user_preferences.txt".format(folder_path)
user_preferences = {}
with open(preferences_file, 'rb') as text:
lines = [line.strip() for line in text]
user_preferences = dict([(line.split(':')[0], int(line.split(':')[1]))
for line in lines])
return user_preferences
def load_strategies(folder_path):
all_files = commands.getoutput("ls {}".format(folder_path)).splitlines()
files = [f for f in all_files if f.endswith('recommendation.txt')]
strategies = {}
strategy_names = [f.split('_')[0] for f in files]
for strategy in strategy_names:
strategy_file = "{}{}_{}".format(folder_path, strategy,
'recommendation.txt')
with open(strategy_file, 'rb') as text:
strategies[strategy] = [line.strip() for line in text]
return strategies
def load_pc_informations(folder_path):
all_files = commands.getoutput("ls {}".format(folder_path)).splitlines()
files = [f for f in all_files if f.endswith('informations.txt')]
informations = {}
pc_informations_file = '{}{}'.format(folder_path, files[0])
valid_info = set(['distributor_id', 'codename'])
with open(pc_informations_file, 'rb') as text:
for line in text:
if ':' not in line:
continue
info = line.split(':')
info[0] = info[0].lower().replace(' ', '_')
if info[0] in valid_info:
informations[info[0]] = info[1].strip()
return informations
def get_strategies_score(strategies, user_preferences):
classifications = {1: 'bad', 2: 'redundant', 3: 'useful',
4: 'useful_surprise'}
strategies_score = {}
for strategy, pkgs in strategies.iteritems():
strategies_score[strategy] = {'bad': 0, 'redundant': 0, 'useful': 0,
'useful_surprise': 0}
for pkg in pkgs:
classification = classifications[user_preferences[pkg]]
strategies_score[strategy][classification] += 1
return strategies_score
def print_strategies_score(strategies_score):
classifications = ['bad', 'redundant', 'useful', 'useful_surprise']
for strategy, score in strategies_score.iteritems():
print "\nStrategy: {}".format(strategy)
for classification in classifications:
print " {}: {}".format(classification, score[classification])
print '\n'
def get_all_strategies_score(all_folders_path):
all_strategies_score = []
for folder_path in all_folders_path:
strategies = load_strategies(folder_path)
user_preferences = load_user_preferences(folder_path)
strategies_score = get_strategies_score(strategies, user_preferences)
all_strategies_score.append(strategies_score)
return all_strategies_score
def get_all_pc_informations(all_folders_path):
all_pc_informations = []
for folder_path in all_folders_path:
pc_information = load_pc_informations(folder_path)
all_pc_informations.append(pc_information)
return all_pc_informations
def convert_to_csv(all_strategies_score, all_pc_informations):
rows = []
possible_strategies = sorted(all_strategies_score[0].keys())
pc_info_header = sorted(all_pc_informations[0].keys())
classifications = ['bad', 'redundant', 'useful', 'useful_surprise']
csv_header = ""
for strategy in possible_strategies:
for classification in classifications:
csv_header += '{}_{};'.format(strategy, classification)
for info in pc_info_header:
csv_header += '{};'.format(info)
rows.append(csv_header[:-1])
for strategies_score in all_strategies_score:
row = []
for strategy, scores in sorted(strategies_score.items()):
for classification in classifications:
row.append(scores[classification])
row = ';'.join(str(element) for element in row)
rows.append(row)
index = 1
for pc_informations, row in zip(all_pc_informations, rows[1:]):
distributor_id = pc_informations['distributor_id']
codename = pc_informations['codename']
row = row + ';{};{}'.format(codename, distributor_id)
rows[index] = row
index += 1
return rows
def main():
folder_path = get_folder_path()
all_folders_path = get_all_folders_path(folder_path)
all_strategies_score = get_all_strategies_score(all_folders_path)
all_pc_informations = get_all_pc_informations(all_folders_path)
csv_rows = convert_to_csv(all_strategies_score, all_pc_informations)
for row in csv_rows:
print row
if __name__ == '__main__':
main()
#!/usr/bin/env python
import matplotlib.pyplot as plt
import numpy as np
from load_data import get_csv_file_path, get_lines_from_csv_file
def plot_cross_validation_averages(metrics_values):
values_plot = []
metrics_plot = []
for metric, values in metrics_values.iteritems():
metrics_plot.append(metric)
values_plot.append(values)
fig = plt.figure()
width = .35
ind = np.arange(len(values_plot))
plt.bar(ind, values_plot, width=width)
plt.xticks(ind + width / 2, metrics_plot)
plt.yticks(np.arange(0.0, 1.1, 0.1))
for a, b in zip(ind, values_plot):
plt.text(a + 0.17, b + 0.02, str(b)[0:5], ha='center')
fig.autofmt_xdate()
plt.show()
def load_csv_file(csv_file_path):
lines = get_lines_from_csv_file(csv_file_path)
metrics = [metric for metric in lines[0]]
values = [map(float, line) for line in lines[1:]]
return values, metrics
def get_metrics_values(values, metrics):
metrics_values = dict((metric, 0) for metric in metrics)
for i in range(len(values)):
for j in range(len(values[i])):
metrics_values[metrics[j]] += values[i][j]
for metric in metrics:
metrics_values[metric] /= len(values)
return metrics_values
def main():
csv_file_path = get_csv_file_path()
values, metrics = load_csv_file(csv_file_path)
metrics_values = get_metrics_values(values, metrics)
plot_cross_validation_averages(metrics_values)
if __name__ == '__main__':
main()
#!/usr/bin/env python
import matplotlib.pyplot as plt
import numpy as np
from load_data import get_csv_file_path, get_lines_from_csv_file
STRATEGIES = ['cbh', 'cbml', 'cbtm']
CLASSIFICATIONS = ['Bad', 'Redundant', 'Useful', 'Useful Surprise']
def autolabel(ax, rects, string_format):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width() / 2.,
1.02 * height, string_format % height,
ha='center', va='bottom')
def plot_strategies_score(strategies_score, classifications, title, ylabel,
plot_min, plot_max, plot_step, string_format='%d'):
colors = ['red', 'orange', 'yellow', 'green']
groups_number = len(strategies_score)
ind = np.arange(groups_number)
width = 0.2
rects = []
fig, ax = plt.subplots()
for index, classification in enumerate(classifications):
values = []
for _, score in strategies_score.iteritems():
values.append(score[index])
rects.append(ax.bar(ind + (width * index), values, width,
color=colors[index]))
ax.set_ylabel(ylabel)
ax.set_title(title)
ax.set_xticks(ind + width)
ax.set_xticklabels(strategies_score.keys())
ax.legend([r[0] for r in rects], classifications)
for rect in rects:
autolabel(ax, rect, string_format)
plt.yticks(np.arange(plot_min, plot_max, plot_step))
plt.show()
def load_csv_file(csv_file_path):
lines = get_lines_from_csv_file(csv_file_path)
scores = []
for line in lines[1:]:
begin = 0
for strategy in STRATEGIES:
score = [strategy] + map(int, line[begin: begin + 4])
begin += 4
scores.append(score)
return scores, CLASSIFICATIONS
def get_sum_of_scores(scores):
sum_scores = {}
for score in scores:
classification = score[0]
if classification not in sum_scores.keys():
sum_scores[classification] = [0] * (len(score) - 1)
for index, value in enumerate(score[1:]):
sum_scores[classification][index] += value
return sum_scores
def main():
csv_file_path = get_csv_file_path()
scores, classifications = load_csv_file(csv_file_path)
sum_scores = get_sum_of_scores(scores)
plot_strategies_score(sum_scores, classifications,
'Amount by classification', 'Amount', 0.0, 55.0,
5.0)
if __name__ == '__main__':
main()
......@@ -15,10 +15,10 @@ class LoadOptions(Singleton):
def load(self):
config = Config()
short_options = 'hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo'
long_options = ['help', 'debug', 'verbose', 'kmedoids=', 'maxpopcon=',
'weight=', 'strategy=', 'profile_size=', 'init',
'train', 'because', 'nrecommendation']
short_options = 'hdvo:d:v:s:z:idvo:tdvo:b:n:cdvo'
long_options = ['help', 'debug', 'verbose', 'strategy=',
'profile_size=', 'init', 'train', 'because',
'nrecommendation', 'contribute']
try:
opts, args = getopt.getopt(sys.argv[1:], short_options,
long_options)
......@@ -49,6 +49,8 @@ class LoadOptions(Singleton):
config.because = True
elif o in ('-n', '--num-recommendations'):
config.num_recommendations = int(p)
elif o in ('-c', '--contribute'):
continue
else:
assert False, "unhandled option"
......@@ -68,38 +70,10 @@ class LoadOptions(Singleton):
" generated a given recommendation"
print " -d, --debug Set logging level to debug"
print " -v, --verbose Set logging level to verbose"
print " -o, --output=PATH Path to file to save output"
print ""
print " [ data sources ]"
print " -f, --filtersdir=PATH Path to filters directory"
print " -b, --pkgsfilter=FILTER File containing packages" \
"to be considered for recommendations"
print " -a, --axi=PATH Path to apt-xapian-index"
print " -p, --popconindex=PATH Path to popcon index"
print " -e, --dde=URL DDE url"
# deprecated options
# print " -m, --popcondir=PATH Path to popcon submissions dir"
# print " -u, --indexmode=MODE " \
# "'old'|'reindex'|'cluster'|'recluster'"
# print " -l, --clustersdir=PATH Path to popcon clusters dir"
# print " -c, --medoids=k " \
# "Number of medoids for clustering"
# print " -x, --maxpopcon=k " \
# "Number of submissions to be considered"
print ""
print " [ recommender ]"
print " -w, --weight=OPTION Search weighting scheme"
print " -s, --strategy=OPTION Recommendation strategy"
print " -z, --profilesize=k Size of user profile"
print " -r, --profiling=OPTION Profile filter strategy"
print " -n, --neighbors=k " \
"Size of neighborhood for collaboration"
print ""
print " [ weight options ] "
print " trad = traditional probabilistic weighting"
print " bm25 = bm25 weighting scheme"
print ""
print " [ strategy options ] "
print " cb = content-based, mixed profile"
......@@ -111,15 +85,9 @@ class LoadOptions(Singleton):
print " cbt_eset = cbt with eset profiling"
print " cbd_eset = cbd_eset with eset profiling"
print " cbh_eset = cbh with eset profiling"
print " knn = collaborative, tf-idf knn"
print " knn_plus = collaborative, tf-idf weighted knn"
print " knn_eset = collaborative, eset knn"
print " knnco = collaborative through content"
print " knnco_eset = collaborative through content," \
" eset recommendation"
print " mlbva = machine_learning, Binary Vector Approach"
print " mlbow = machine_learning, Bag Of Words"
print ""
print " [ to train machine learning ] "
print " on path '/bin' run the script" \
" 'apprec_ml_traning.py'"
print " [ contribute with AppRecommender ]"
print " -c, --contribute classify recommendations" \
" helping AppRecommender to improve recommendations"
......@@ -29,6 +29,7 @@ from apprecommender.load_options import LoadOptions
from apprecommender.config import Config
from apprecommender.strategy import (MachineLearning, MachineLearningBVA,
MachineLearningBOW)
from apprecommender.data_collect import collect_user_data
SUCCESS = 0
ERROR_INIT = 1
......@@ -70,6 +71,8 @@ def run():
MachineLearning.train(MachineLearningBVA)
MachineLearning.train(MachineLearningBOW)
return SUCCESS
elif check_for_flag(options, '-c', '--contribute'):
collect_user_data.main()
else:
return run_apprecommender(load_options.options)
......
#!/bin/bash
echo "Para executar o AppRecommender as seguintes dependencias serao instaladas:"
echo ""
echo "python python-xapian python-apt python-cluster python-webpy"
echo "python-simplejson python-numpy apt-xapian-index python-xdg debtags"
echo "python-pip python-sklearn python-matplotlib python-stemmer"
echo ""
echo "Apos a instalação das dependencias os pacotes serao indexados ao xapian, que é o banco de dados utilizado pelo AppRecommender"
echo ""
cd bin/data_collect/
./install_dependencies.sh
cd -
cd bin/
echo ""
echo "Agora os dados do AppRecommender serao inicializados"
./apprec.py --init
cd data_collect/
echo ""
echo "Iniciando a coleta de dados"
./collect_user_data.py
echo ""
echo "Desinstalando as dependencias do AppRecommender"
./remove_dependencies.sh
echo ""
echo ""
echo ""
echo "Compacte o arquivo de log que está na home"
echo "o nome do arquivo comeca com 'app_recommender_log'"
echo "$ cd ~"
echo "$ tar -zcvf [nome_da_pasta].tar.gz [nome_da_pasta]"
echo ""
echo "Envie o arquivo compactado para um dos seguintes emails:"
echo "lucianopcbr@gmail.com"
echo "lucas.moura128@gmail.com"
echo ""
echo "Como titulo do e-mail utilize 'coleta de dados'"
echo ""
echo ""
echo "Obrigado por colaborar com nosso trabalho"
echo ""
echo "Att,"
echo "Lucas Moura e Luciano Prestes"
echo ""
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment