Commit a2524205 authored by Ricardo J. Mendez's avatar Ricardo J. Mendez

Merge branch 'feature/ignore-list' into develop

parents 12547594 9596d91a
......@@ -4,6 +4,7 @@
[relevance.io :as io]
[relevance.migrations :as migrations]
[relevance.utils :refer [on-channel url-key host-key hostname is-http? ms-day]]
[relevance.settings :refer [default-settings]]
[khroma.alarms :as alarms]
[khroma.context-menus :as menus]
[khroma.idle :as idle]
......@@ -32,7 +33,6 @@
(defn now [] (.now js/Date))
;;;;-------------------------------------
;;;; Functions
;;;;-------------------------------------
......@@ -118,7 +118,7 @@
::initialize
(fn [_]
(go
(dispatch [:data-load (<! (io/load))])
(dispatch [:data-load (<! (io/load :data)) (or (<! (io/load :settings)) default-settings)])
(dispatch [::window-focus {:windowId (:id (<! (windows/get-last-focused {:populate false})))}])
;; We should only hook to the channels once, so we do it during the :initialize handler
(hook-to-channels)
......@@ -136,27 +136,32 @@
(register-handler
:data-load
(fn [app-state [_ loaded]]
(let [migrated (migrations/migrate-to-latest loaded)
t (now)
new-urls (->
(:url-times migrated)
(data/time-clean-up (- t (* 7 ms-day)) 30)
(data/time-clean-up (- t (* 14 ms-day)) 90)
(data/time-clean-up (- t (* 30 ms-day)) 300))
site-data (:site-times migrated)
new-sites (if (not= new-urls (:url-times migrated))
(->>
;; Accumulate site times but preserve the icons we had before
(data/accumulate-site-times new-urls)
(map #(vector (key %)
(assoc (val %) :icon (get-in site-data [(key %) :icon]))))
(into {}))
site-data)
new-data (assoc migrated :url-times new-urls :site-times new-sites)]
; (console/trace "Data load" loaded "migrated" new-data)
(fn [app-state [_ data settings]]
(let [migrated (migrations/migrate-to-latest data)
t (now)
ignore-set (:ignore-set settings)
new-urls (->
(:url-times migrated)
(data/clean-up-by-time (- t (* 7 ms-day)) 30)
(data/clean-up-by-time (- t (* 14 ms-day)) 90)
(data/clean-up-by-time (- t (* 30 ms-day)) 300)
(data/clean-up-ignored ignore-set))
site-data (:site-times migrated)
new-sites (if (not= new-urls (:url-times migrated))
(->>
;; Accumulate site times but preserve the icons we had before
(data/accumulate-site-times new-urls)
(map #(vector (key %)
(assoc (val %) :icon (get-in site-data [(key %) :icon]))))
(into {}))
site-data)
new-data (assoc migrated :url-times new-urls :site-times new-sites)]
; (console/trace "Data load" data "migrated" new-data)
; (console/trace "Settings" settings)
;; Save the migrated data we just received
(io/save new-data)
;; We don't save the settings, since the background script does not really change them.
;; That's the UI's domain.
(io/save :data new-data)
;; Process the suspend info
(let [suspend-info (:suspend-info new-data)
old-tab (:active-tab suspend-info)
......@@ -167,15 +172,16 @@
(if is-same?
(dispatch [:handle-activation old-tab (:start-time old-tab)])
(dispatch [:handle-deactivation old-tab (:time suspend-info)])))
(-> app-state
(assoc :data (dissoc new-data :suspend-info))))))
(assoc app-state :data (dissoc new-data :suspend-info)
:settings settings)
)))
(register-handler
:data-set
(fn [app-state [_ key item]]
(let [new-state (assoc-in app-state [:data key] item)]
(io/save (:data new-state))
(io/save :data (:data new-state))
new-state)
))
......@@ -262,7 +268,7 @@
(fn [app-state [_ {:keys [message sender]}]]
; (console/log "GOT INTERNAL MESSAGE" message "from" sender)
(condp = (keyword message)
:reload-data (go (dispatch [:data-load (<! (io/load))])))
:reload-data (go (dispatch [:data-load (<! (io/load :data)) (<! (io/load :settings))])))
app-state
))
......@@ -336,7 +342,7 @@
site-times (data/track-site-time (or (:site-times data) {}) tab (quot time 1000) (now))
new-data (assoc data :url-times url-times :site-times site-times)]
; (console/trace time " milliseconds spent at " tab)
(io/save new-data)
(io/save :data new-data)
(assoc app-state :data new-data)
)))
......
(ns relevance.data
(:require [relevance.utils :refer [url-key host-key hostname]]))
(:require [relevance.utils :refer [url-key host-key hostname]]
[khroma.log :as console]))
(defn accumulate-site-times
......@@ -18,7 +19,7 @@
)
(defn time-clean-up
(defn clean-up-by-time
"Removes from url-times all the items that are older than cut-off-ts
and which were viewed for less than min-seconds"
[url-times cut-off-ts min-seconds]
......@@ -27,11 +28,19 @@
(< (:time (val %)) min-seconds))
url-times)))
(defn clean-up-ignored
"Removes from url-times all the items for which the domain
matches an ignore set"
[url-times ignore-set]
(into {} (remove #(contains? ignore-set (hostname (:url (val %))))
url-times))
)
(defn track-url-time
"Receives a url time database, a tab record and a time to track, and returns
new time database which is the result of adding the time to the URL. It also
timestamps the record with the timestamp received."
[url-times tab time timestamp]
[url-times tab time timestamp & {:keys [ignore-set]}]
(let [url (or (:url tab) "")
id (url-key url)
url-item (or (get url-times id)
......@@ -39,6 +48,7 @@
:time 0
:ts 0})
track? (and (not= 0 id)
(not (contains? ignore-set (hostname url)))
(< 0 time))
new-item (assoc url-item :time (+ (:time url-item) time)
:title (:title tab)
......@@ -53,7 +63,7 @@
new time database which is the result of adding the time to the site. It also
timestamps the record with the timestamp received, and adds the favIconUrl of
the tab as the one for the entire site."
[site-times tab time timestamp]
[site-times tab time timestamp & {:keys [ignore-set]}]
(let [host (hostname (or (:url tab) ""))
id (host-key host)
site-item (or (get site-times id)
......@@ -61,6 +71,7 @@
:time 0
:ts 0})
track? (and (not= 0 id)
(not (contains? ignore-set host))
(< 0 time))
new-item (assoc site-item :time (+ (:time site-item) time)
:icon (:favIconUrl tab)
......
......@@ -10,19 +10,21 @@
(defn save-raw
"Saves the data raw, without converting it to transit first."
[data callback]
(storage/set {:data data} storage/local callback))
[id data callback]
(storage/set {id data} storage/local callback))
(defn save
"Saves our data on the extension's storage after converting it to transit."
[data]
(save-raw (to-transit data) nil))
([id data]
(save id data nil))
([id data callback]
(save-raw id (to-transit data) callback)))
(defn load
"Returns a channel where we'll put the entire data block read from the
extension's storage"
[]
[id]
(go
(let [raw (:data (<! (storage/get)))]
(let [raw (id (<! (storage/get)))]
(from-transit raw))))
(ns relevance.settings)
(def default-settings
{:ignore-set #{"localhost" "newtab" "t.co" "buff.ly"}})
\ No newline at end of file
......@@ -2,7 +2,8 @@
(:require [cljs.core.async :refer [<!]]
[clojure.string :refer [lower-case trim]]
[dommy.core :as dommy]
[cognitect.transit :as transit])
[cognitect.transit :as transit]
[clojure.string :as string])
(:require-macros [cljs.core.async.macros :refer [go go-loop]]))
......@@ -82,6 +83,15 @@
(hash-string shortened))
0))
(defn to-string-set
"Split a string into a string set using commas, semi-colons or new lines, and returns it as a set"
[s]
(->>
(string/split (or s "") #",|\n|;| ")
(map string/trim)
(remove empty?)
(map string/lower-case)
(into #{})))
(defn time-display
"Returns a display string for a number of milliseconds"
......@@ -94,6 +104,5 @@
(< seconds 60) (str seconds "s")
(< seconds 3600) (time-label (quot seconds 60) "min" (rem seconds 60) "s")
(< seconds 86400) (time-label (quot seconds 3600) "h" (quot (rem seconds 3600) 60) "min")
:else (time-label (quot seconds 86400) "d" (quot (rem seconds 86400) 3600) "h"))
)
:else (time-label (quot seconds 86400) "d" (quot (rem seconds 86400) 3600) "h")))
)
\ No newline at end of file
......@@ -41,7 +41,7 @@
(defn do-transformations! []
(go
(let [data (<! (io/load))
(let [data (<! (io/load :data))
nodes (sel :.result_url_heading)
base (sel1 :.web_regular_results)]
(doseq [node nodes]
......
......@@ -4,13 +4,17 @@
[cljs.core.async :refer [>! <!]]
[cljs.core :refer [random-uuid]]
[cljsjs.react-bootstrap]
[clojure.string :as string]
[khroma.idle :as idle]
[khroma.log :as console]
[khroma.runtime :as runtime]
[khroma.storage :as storage]
[reagent.core :as reagent]
[re-frame.core :refer [dispatch register-sub register-handler subscribe dispatch-sync]]
[relevance.io :as io])
[relevance.io :as io]
[relevance.utils :as utils]
[relevance.settings :refer [default-settings]]
)
(:require-macros [cljs.core :refer [goog-define]]
[cljs.core.async.macros :refer [go go-loop]]
[reagent.ratom :refer [reaction]]))
......@@ -26,6 +30,7 @@
;; Application data, will be saved
(register-sub :data general-query)
(register-sub :settings general-query)
(register-sub :raw-data general-query)
;; Transient data items
(register-sub :ui-state general-query)
......@@ -46,11 +51,6 @@
[tabs]
(remove #(.startsWith (:url %) "chrome") tabs))
;; Tab items we actually care about
(def relevant-tab-items [:index :url :title :icon])
;;;;----------------------------
;;;; Handlers
;;;;----------------------------
......@@ -67,7 +67,7 @@
(fn [app-state [_ transit-data]]
;; We actually just need to save it, since ::storage-changed takes care
;; of loading it and importing it.
(io/save-raw transit-data #(runtime/send-message :reload-data))
(io/save-raw :data transit-data #(runtime/send-message :reload-data))
(-> app-state
(assoc-in [:ui-state :section] :url-times)
(assoc-in [:app-state :import] nil))
......@@ -77,9 +77,13 @@
(register-handler
::initialize
(fn [_]
;; Fake a ::storage-changed message to load the data from storage
(go (dispatch [::storage-changed {:changes {:data {:newValue (:data (<! (storage/get)))}}}]))
(go
(dispatch [:settings-set (or (<! (io/load :settings))
default-settings)])
;; Fake a ::storage-changed message to load the data from storage
(dispatch [::storage-changed {:changes {:data {:newValue (:data (<! (storage/get)))}}}]))
{:app-state {}
:settings default-settings
:ui-state {:section :intro}}))
......@@ -88,6 +92,23 @@
(fn [app-state [_ info]]
(assoc-in app-state [:ui-state :modal-info] info)))
(register-handler
:settings-parse
(fn [app-state [_ settings]]
(let [ignore-set (utils/to-string-set (:ignore-set settings))]
(dispatch [:settings-set {:ignore-set ignore-set} true])
app-state)
))
(register-handler
:settings-set
(fn [app-state [_ settings save?]]
; (console/log "Saving" settings save?)
(when save?
;; We tell the backend to reload the data after saving the settings, since
;; they can have an effect on behavior.
(io/save :settings settings #(runtime/send-message :reload-data)))
(assoc app-state :settings settings)))
(register-handler
::storage-changed
......@@ -116,11 +137,6 @@
;;;;----------------------------
(defn navbar-item [label section current]
[:li {:class (when (= section current) "active")}
[:a {:on-click #(dispatch [:app-state-item [:ui-state :section] section])} label
(when (= section current) [:span {:class "sr-only"} "(current)"])]])
(defn nav-left-item [label class section current]
[:li {:class (when (= section current) "active")}
[:a {:on-click #(dispatch [:app-state-item [:ui-state :section] section])}
......@@ -136,6 +152,7 @@
(nav-left-item "Introduction" "pe-7s-home" :intro @section)
(nav-left-item "Page times" "pe-7s-note2" :url-times @section)
(nav-left-item "Site times" "pe-7s-note2" :site-times @section)
(nav-left-item "Settings" "pe-7s-config" :settings @section)
(nav-left-item "Export data" "pe-7s-box1" :export @section)
(nav-left-item "Import data" "pe-7s-attention" :import @section)]))
)
......@@ -150,6 +167,7 @@
:url-times "Time reading a page"
:site-times "Time visiting a site"
:export "Export your Relevance data"
:settings "Settings"
:import "Import a Relevance backup"
"")
]])))
......@@ -200,7 +218,7 @@
(< age-ms (* 14 ms-day)) "#cc6600"
:else "#994c00"
)
]
]
^{:key i}
[:tr
[:td {:class "col-sm-2"}
......@@ -330,6 +348,7 @@
]
])))
(defn data-import []
(let [import-data (reagent/atom "")]
(fn []
......@@ -342,14 +361,39 @@
:rows 30
:value @import-data
:on-change #(reset! import-data (-> % .-target .-value))}]
[:a {:class "btn btn-danger btn-sm"
[:a {:class "btn btn-primary btn-sm"
:on-click #(dispatch [:data-import @import-data])} "Import"]
])))
(defn div-settings []
(let [ignore-set (subscribe [:settings :ignore-set])
our-ignore (reagent/atom (string/join "\n" (sort @ignore-set)))
]
(fn []
[:div {:class "col-sm-12"}
[:div {:class "row"}
[:div {:class "col-sm-6"}
[:h3 "Ignore domains"]
[:p "Type on the left domains that you want ignore, one per line."]
[:p {:class "alert alert-info"} [:strong "Heads up! "] "Adding a domain to the ignore list will remove the data Relevance currently has for it."]
]
[:div {:class "col-sm-6"}
[:textarea {:class "form-control"
:value @our-ignore
:rows 10
:on-change #(reset! our-ignore (-> % .-target .-value))}
]]
]
[:div {:class "row"}
[:a {:class "btn btn-danger btn-sm"
:on-click #(dispatch [:settings-parse {:ignore-set @our-ignore}])} "Save settings"]]
])))
(def component-dir {:export data-export
:import data-import
:intro div-intro
:settings div-settings
:url-times div-urltimes
:site-times div-sitetimes})
......
......@@ -185,6 +185,26 @@
result (data/track-url-time (:url-times test-db) tab 9 ts)]
(is (= result (:url-times test-db)))
))
(testing "Attempting to add time to an ignored URL causes no changes"
;; Repeating almost the exact same test as when we tracked the time for
;; Numergent, only passing it as an ignore domain now.
(let [tab {:url "http://numergent.com/"
:title "Numergent limited"
:favIconUrl "http://numergent.com/favicon.png"}
ts 1445964037799
with-ignore (data/track-url-time (:url-times test-db) tab 9 ts
:ignore-set #{"localhost" "somedomain.com" "numergent.com"})
no-ignore (data/track-url-time (:url-times test-db) tab 9 ts
:ignore-set #{"localhost" "somedomain.com"})
tab-key (utils/url-key "http://numergent.com/")
item (get with-ignore tab-key)]
(is with-ignore)
(is (nil? item))
(is (= with-ignore (:url-times test-db)) "URL times should not have been altered")
(is (not= with-ignore no-ignore) "Removing the domain from the ignore list should result on the element being added")
(is (= (count no-ignore) (inc (count with-ignore))) "Result without ignoring the element should have one more value")
)
)
)
......@@ -207,7 +227,11 @@
(:favIconUrl tab) (:icon item)
"numergent.com" (:host item)
ts (:ts item)
1234 (:time item)))
1234 (:time item))
;; Let's make sure we did not break anything while adding an ignore parameter
(is (= result (data/track-site-time {} tab 1234 ts :ignore-set #{"localhost" "newtab"}))
"The result should be the same even if we pass an ignore-set")
)
)
(testing "Add time to an existing database for an existing site"
(let [tab {:url "http://numergent.com/opensource/index.html"
......@@ -230,7 +254,12 @@
147 (:time item))
(doseq [other (dissoc result id)]
(is (= (val other) (get (:site-times test-db) (key other))) "Other items should have remained untouched")
)))
)
;; Let's make sure we did not break anything while adding an ignore parameter
(is (= result (data/track-site-time (:site-times test-db) tab 3 ts
:ignore-set #{"localhost" "newtab"}))
"The result should be the same even if we pass an ignore-set")
))
(testing "Add time to an existing database for a new site"
(let [tab {:url "https://twitter.com/ArgesRic"
:title "ArgesRic"
......@@ -252,7 +281,13 @@
9 (:time item))
(doseq [other (dissoc result id)]
(is (= (val other) (get (:site-times test-db) (key other))) "Other items should have remained untouched")
))
)
;; Then, let's make sure we did not break anything while adding an ignore parameter
(is (= result
(data/track-site-time (:site-times test-db) tab 9 ts
:ignore-set #{"localhost" "somedomain.com"})))
)
)
(testing "Add zero time should not result on any changes"
(let [tab {:url "https://twitter.com/ArgesRic"
......@@ -278,13 +313,35 @@
ts)]
(is result)
(is (= result (:site-times test-db)))))
(testing "Add time to an ignored site does not change the database"
;; Repeating almost the exact same test as when we tracked the time for
;; Numergent, only passing it as an ignore domain now.
(let [tab {:url "http://numergent.com/opensource/index.html"
:title "Further open source project details"
:favIconUrl "http://numergent.com/newfavicon.png"}
ts 1445964037900
with-ignore (data/track-site-time (:site-times test-db) tab 3 ts
:ignore-set #{"localhost" "somedomain.com" "numergent.com"})
no-ignore (data/track-site-time (:site-times test-db) tab 3 ts
:ignore-set #{"localhost" "somedomain.com"})
id (utils/host-key (utils/hostname "http://numergent.com/opensource/"))
item (get no-ignore id)]
(is with-ignore)
(is (= with-ignore (:site-times test-db)))
(is (not= with-ignore no-ignore))
;; Then let's verify the values on the one we actually added
(are [expected result] (= expected result)
(:favIconUrl tab) (:icon item)
"numergent.com" (:host item)
ts (:ts item)
147 (:time item))))
)
(deftest test-time-clean-up
(deftest test-clean-up-by-time
(testing "Clean up date and minimum time are respected"
(let [min-date 1446028215913
pruned (data/time-clean-up (:url-times test-db) min-date 30)]
pruned (data/clean-up-by-time (:url-times test-db) min-date 30)]
(is pruned)
(is (= 5 (count pruned)))
;; We removed the right elements
......@@ -295,7 +352,7 @@
))
(testing "Timestamp filtering is only on strictly greater than"
(let [min-date 1446114615912
pruned (data/time-clean-up (:url-times test-db) min-date 30)]
pruned (data/clean-up-by-time (:url-times test-db) min-date 30)]
(is pruned)
(is (= 3 (count pruned)))
;; getprismatic is still there
......@@ -306,7 +363,7 @@
))
(testing "Cut-off seconds are respected when filtering"
(let [min-date 1446114615912
pruned (data/time-clean-up (:url-times test-db) min-date 28)]
pruned (data/clean-up-by-time (:url-times test-db) min-date 28)]
(is pruned)
(is (= 4 (count pruned)))
;; getprismatic is still there
......@@ -314,13 +371,34 @@
;; ... and we didn' lose splunk
(is (get pruned (utils/url-key "http://splunk.com/"))))
(let [min-date 1446114615913
pruned (data/time-clean-up (:url-times test-db) min-date 50)]
pruned (data/clean-up-by-time (:url-times test-db) min-date 50)]
(is pruned)
(is (= 2 (count pruned)))
(is (= #{-327774960 -327358142}
(into #{} (keys pruned)))))
))
(deftest test-clean-up-ignored
(let [url-times (:url-times test-db)]
(is (= url-times
(data/clean-up-ignored url-times #{}))
"Passing an empty set should not change things")
(is (= url-times
(data/clean-up-ignored url-times #{"localhost" "somedomain.com"}))
"Passing a set of not-matching domain does not change things")
;; Test removing a domain
(let [result (data/clean-up-ignored url-times #{"localhost" "numergent.com"})]
(is (= result (dissoc url-times -327774960 -526558523))
"We should have removed the numergent-associated urls")
(is (= 5 (count result))))
;; Test removing multiple domains
(let [result (data/clean-up-ignored url-times #{"localhost" "getprismatic.com" "numergent.com"})]
(is (= result (dissoc url-times -327774960 -526558523 1609181525))
"We should have removed the numergent-associated urls")
(is (= 4 (count result))))
))
(deftest test-accumulate-site-times
(testing "Accumulate site times creates a total but doesn't add favicons"
(is (= (into {} (map #(vector (key %) (assoc (val %) :icon nil))
......@@ -362,7 +440,7 @@
(deftest test-accumulate-after-clean-up
(testing "We get a value accumulation per site time after clean up"
(let [min-date 1446114615912
pruned (data/time-clean-up (:url-times test-db) min-date 30)
pruned (data/clean-up-by-time (:url-times test-db) min-date 30)
site-times (data/accumulate-site-times pruned)]
(is pruned)
(is (= {971841386 {:icon nil
......
......@@ -87,6 +87,7 @@
"chrome://extensions/?id=okhigbflgnbihoiokilagelkalkcigfp" "extensions"
"chrome-extension://okhigbflgnbihoiokilagelkalkcigfp/index.html" "okhigbflgnbihoiokilagelkalkcigfp"
"file:///Users/ricardo/Sources/user.html" ""
"view-source:http://localhost:4000/opensource/" ""
"" ""
nil nil
))
......@@ -115,7 +116,21 @@
"http://localhost" true
"https://numergent.com/" true
"chrome://extensions/?id=okhigbflgnbihoiokilagelkalkcigfp" false
"view-source:http://localhost:4000/opensource/" false
"http" false
"" false
nil false
))
\ No newline at end of file
))
(deftest test-to-string-set
(are [s result] (= result (utils/to-string-set s))
"alpha" #{"alpha"}
"alpha\nbeta" #{"alpha" "beta"}
"ALPHA\nBETA" #{"alpha" "beta"}
"alpha\nbeta " #{"alpha" "beta"}
"a\nbeta,c" #{"a" "beta" "c"}
"a\nbeta,C;d;" #{"a" "beta" "c" "d"}
"a b,c" #{"a" "b" "c"}
"a,b,,c,;d;e;; f,e\n\n" #{"a" "b" "c" "d" "e" "f"}
))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment