Commit 92049755 authored by Ricardo J. Mendez's avatar Ricardo J. Mendez

Merge branch 'feature/account-for-root-domain' into develop

Closes #5
parents 4eac6eab dd5a6697
Pipeline #13327384 passed with stage
in 1 minute and 20 seconds
......@@ -4,9 +4,9 @@ Relevance is a smart tab organizer for Chrome, written in ClojureScript.
It’ll create a natural arrangement where the tabs you have spent the longest on, which are expected to be the most relevant, are placed first, and the ones you haven’t read at all are shunted to the end of your list.
[You can read more about it here](
[You can read more about it here](, which includes a changelog.
This is Relevance 1.0.11-SNAPSHOT.
This is Relevance 1.1.0-SNAPSHOT.
# Building
(defproject relevance-chrome "1.0.11-SNAPSHOT"
(defproject relevance-chrome "1.1.0-SNAPSHOT"
:license {:name "MIT License"
:url ""}
:dependencies [[org.clojure/clojure "1.8.0"]
......@@ -4,7 +4,7 @@
[ :as data]
[ :as io]
[relevance.migrations :as migrations]
[relevance.order :refer [time-score score-tabs]]
[relevance.order :refer [time-score sort-by-root]]
[relevance.utils :refer [on-channel url-key host-key hostname is-http? ms-day]]
[relevance.settings :refer [default-settings]]
[khroma.alarms :as alarms]
......@@ -101,7 +101,7 @@
(let [{:keys [settings data]} app-state
{:keys [url-times site-times]} data
tabs (score-tabs (:tabs (<! (windows/get window-id)))
tabs (sort-by-root (:tabs (<! (windows/get window-id)))
(:require [relevance.utils :refer [url-key is-http? host-key hostname]]
"Contains functions related to data tracking and accumulation.
It does not account for the actual ordering based on this data. You should
see `relevance.order` for that."
(:require [relevance.utils :refer [url-key is-http? host-key hostname root]]
[khroma.log :as console]))
......@@ -7,7 +10,14 @@
"Accumulates the total time for a site from a hashmap of URL times.
Returns a hashmap with the URL ID as the key, and the :time, :icon and
:host string on its value."
:host string on its value.
This function differentiates between hostnames on the different root domain.
This means that and are accumulated separately.
While we could lump them together into one at this point, keeping them
separately will allows us to apply the same weight to pages on the same
hostname, which will lead to more natural ordering."
(group-by #(hostname (:url %)) (vals url-times))
......@@ -19,6 +29,19 @@
(into {})))
(defn accumulate-root-times
"Expects the hashmap resulting from `accumulate-site-times`, and returns a
new hashmap where times are accumulated by the root name.
This will let us prioritize the pages in the same root domain together,
while still keeping the per-site ordering."
(->> (group-by #(root (:host %)) (vals site-times))
(remove #(empty? (key %)))
(map #(vector (key %)
(apply + (map :time (val %)))))
(into {})))
(defn clean-up-by-time
(ns relevance.order
(:require [relevance.utils :refer [on-channel url-key host-key hostname is-http? ms-day]]))
(:require [relevance.utils :refer [on-channel url-key host-key hostname root is-http? ms-day]]
[ :refer [accumulate-root-times]]))
......@@ -15,44 +16,77 @@
(defn time-score
"Returns a score for a tab based on the total time spent at both a URL and
the site the URL belongs to."
"Returns map containing a score for a tab based on the total time spent at
both a URL and the site the URL belongs to, as well as a flag indicating
if it's a priority tab."
[tab url-times site-times settings]
(let [url (:url tab)
idx (:index tab)
url-time (or (:time (get url-times (url-key url)))
is-priority? (and (:sound-to-left? settings)
priority? (and (:sound-to-left? settings)
(:audible tab))
is-penalized? (and (not (is-http? url))
(not is-priority?))
penalized? (and (not (is-http? url))
(not priority?))
tab-time (cond
;; Add an extra score if it's a priority URL
is-priority? (+ sound-extra-score idx)
;; If a URL is penalized, we want it to at least have a
;; value of 1, otherwise the tab time gets ignored and
;; we'd default to using the raw site time
is-penalized? (max url-time 1)
priority? (+ sound-extra-score idx)
;; If a URL is penalized, disregard the time
;; and use its index (it'll get penalized later)
penalized? idx
;; ... otherwise we just go with the raw URL time
:else url-time)
site-time (or (:time (get site-times (host-key (hostname url)))) 0)
total (+ tab-time site-time)
score (if is-penalized? (* total non-http-penalty) total)]
(or (when (pos? tab-time) score)
(- site-time idx))))
host-time (or (:time (get site-times (host-key (hostname url)))) 0)
total (+ tab-time host-time)
score (if penalized? (* total non-http-penalty) total)]
;; A tab without positive time in it will use its index as a small
;; offset to the host time. That way pages from the same host are
;; lumped together, and new tabs are sorted by index.
{:score (or (when (pos? tab-time) score)
(+ host-time (* 0.1 idx)))
:priority? (not (or (nil? priority?)
(false? priority?)))}))
(defn score-tabs
(defn score-and-sort-simple
"Expects a group of tabs and associates a time score with each."
[tabs url-times site-times settings]
(->> tabs
(map #(merge % (time-score % url-times site-times settings)))
(sort-by #(- (:score %)))))
(defn sort-by-root
"Returns a hashmap of the new tab ids and their indexes, based on a tab list and
the score function for time spent on urls and sites."
[tabs url-times site-times settings]
(let [root-times (accumulate-root-times site-times)]
; (cljs.pprint/pprint tabs)
; (cljs.pprint/pprint root-times)
(->> tabs
(map #(assoc % :time (time-score % url-times site-times settings)))
(sort-by #(* -1 (:time %)))
;; We first group them by root hostname, sort the tab subgroups,
;; and then sort the groups by the time spend on the root.
(group-by #(root (hostname (:url %))))
(map (fn [[k v]]
(let [scored (score-and-sort-simple v url-times site-times settings)
root-time (get root-times k)
;; We may get a bunch of pages where the root time is 0.
;; In that case, let's sort them by their accumulated score.
idx (if (pos? root-time)
(apply + (map :score scored)))]
[(- idx)
(sort-by first)
;; Discard the root names and just flatten the list
(map second)
;; Once we have done this, we will need to take priority tabs into
;; account. Those will break from their main group and appear first.
(map-indexed #(assoc %2 :index %1))
(sort-by #(if (:priority? %)
(- (:score %))
(:index %)))
;; Ready!
(map-indexed #(hash-map :index %1
:id (:id %2)))))
:id (:id %2))))))
\ No newline at end of file
......@@ -52,6 +52,14 @@
(defn root
"Returns the root domain for a host"
(->> (string/split (lower-case (or host ""))
(take-last 2)
(string/join ".")))
(defn protocol
"Returns the protocol for a URL"
......@@ -67,7 +75,9 @@
(and (some? url)
(some? (re-find #"\bhttps?:" (protocol url)))))
(defn host-key [host]
(defn host-key
"Returns a key for a hostname, or 0 if the hostname is empty"
(if (not-empty host)
(hash-string (trim (lower-case host)))
......@@ -90,11 +100,10 @@
"Split a string using commas, semi-colons or new lines, trims the resulting
elements, and returns them as a set"
(string/split (or s "") #",|\n|;| ")
(->> (string/split (lower-case (or s ""))
#",|\n|;| ")
(map string/trim)
(remove empty?)
(map string/lower-case)
(into #{})))
(defn time-display
......@@ -437,15 +437,22 @@
{:url ""
:time 4
:ts 1446051494575
:title "New York spot price Gold..."}
{:url ""
:time 2
:ts 1446051494575
:title "New York spot price Gold..."}}
acc (data/accumulate-site-times data)]
;; There should be no empty hostnames
;; We check (get acc 0) because the result is indexed by the host-key,
;; which returns 0 on nil or empty.
(is (nil? (get acc 0)))
;; Let's verify we got the right data
(is (= {971841386 {:time 39, :icon nil, :host ""},
-915908674 {:time 4, :icon nil, :host ""}}
;; Let's verify we got the right data. Notice that accumulate-site-times
;; does not take into account differences in the root domain.
(is (= {971841386 {:time 39 :icon nil :host ""}
-915908674 {:time 4 :icon nil :host ""}
996869973 {:time 2 :icon nil :host ""}}
(testing "Accumulate site times disregards the port for the URL when accumulating"
(let [data {2080624698
......@@ -482,8 +489,40 @@
(is (= {971841386 {:time 27 :icon nil :host ""}
-915908674 {:time 4 :icon nil :host ""}
-1536293812 {:time 37 :icon nil :host ""}}
(deftest test-accumulate-root-times
(let [data {2080624698
{:url "/tags/khroma/"
:time 117
:ts 1445964037798
:title "Khroma articles"}
{:url ""
:time 27
:ts 1445964037798
:title "Open source projects"}
{:url ""
:time 12
:ts 1445964037798
:title "Khroma articles"}
{:url ""
:time 4
:ts 1446051494575
:title "New York spot price Gold..."}
{:url ""
:time 3
:ts 1446051494575
:title "New York spot price Gold..."}}
site-times (data/accumulate-site-times data)
root-times (data/accumulate-root-times site-times)]
(testing "Accumulate root times takes into lumps together pages on the same root"
(is (= {"" 39
"" 7}
(deftest test-accumulate-after-clean-up
This diff is collapsed.
......@@ -99,6 +99,17 @@
(deftest test-root
(are [host name] (= (utils/root host) name)
"" ""
"" ""
"" ""
"localhost" "localhost"
"" ""
nil ""
(deftest test-protocol
(are [url name] (= (utils/protocol url) name)
"" "https:"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment