Commit 855278ea authored by Ricardo J. Mendez's avatar Ricardo J. Mendez

v3 data migration, not tracking sub-second times

- Precision is now kept in seconds, not milliseconds
- Renamed :timestamp to :ts
- Renamed :favIconURL to :icon
- Removed any page where we haven't spent at least one second
parent a8357d98
......@@ -26,7 +26,7 @@
(def window-alarm "window-alarm")
(def non-http-penalty 0.05)
(def relevant-tab-keys [:windowId :id :active :url :start-time :title :favIconUrl])
(def relevant-tab-keys [:windowId :id :active :url :start-time :title :icon])
(def select-tab-keys #(select-keys % relevant-tab-keys))
(defn now [] (.now js/Date))
......@@ -328,8 +328,8 @@
:track-time
(fn [app-state [_ tab time]]
(let [data (:data app-state)
url-times (data/track-url-time (or (:url-times data) {}) tab time (now))
site-times (data/track-site-time (or (:site-times data) {}) tab time (now))
url-times (data/track-url-time (or (:url-times data) {}) tab (quot time 1000) (now))
site-times (data/track-site-time (or (:site-times data) {}) tab (quot time 1000) (now))
new-data (assoc data :url-times url-times :site-times site-times)]
; (console/trace time " milliseconds spent at " tab)
(io/save new-data)
......
......@@ -10,7 +10,7 @@
(into {} (map #(vector (host-key (key %))
(hash-map :host (key %)
:time (apply + (map :time (val %)))
:favIconUrl (:favIconUrl (first (val %))))
:icon (:icon (first (val %))))
)))
))
......@@ -19,10 +19,9 @@
"Removes from url-times all the items that are older than cut-off-ts
and which were viewed for less than min-seconds"
[url-times cut-off-ts min-seconds]
(into {} (remove #(and (< (:timestamp (val %))
(into {} (remove #(and (< (:ts (val %))
cut-off-ts)
(< (:time (val %))
(* min-seconds 1000)))
(< (:time (val %)) min-seconds))
url-times)))
(defn track-url-time
......@@ -33,13 +32,14 @@
(let [url (or (:url tab) "")
id (url-key url)
url-item (or (get url-times id)
{:url url
:time 0
:timestamp 0})
track? (not= 0 id)
{:url url
:time 0
:ts 0})
track? (and (not= 0 id)
(< 0 time))
new-item (assoc url-item :time (+ (:time url-item) time)
:title (:title tab)
:timestamp timestamp)]
:ts timestamp)]
(if track?
(assoc url-times id new-item)
url-times)))
......@@ -54,13 +54,14 @@
(let [host (hostname (or (:url tab) ""))
id (host-key host)
site-item (or (get site-times id)
{:host host
:time 0
:timestamp 0})
track? (not= 0 id)
{:host host
:time 0
:ts 0})
track? (and (not= 0 id)
(< 0 time))
new-item (assoc site-item :time (+ (:time site-item) time)
:favIconUrl (:favIconUrl tab)
:timestamp timestamp)]
:icon (:icon tab)
:ts timestamp)]
(if track?
(assoc site-times id new-item)
site-times)))
\ No newline at end of file
(ns relevance.migrations
(:require
[clojure.set :refer [rename-keys]]
[relevance.data :refer [accumulate-site-times]]
[relevance.utils :refer [url-key host-key hostname]]))
......@@ -15,13 +16,26 @@
(.-uuid (random-uuid))))
(assoc :data-version 1)
(assoc :url-times (into {} (map #(vector (key %)
(dissoc (val %) :favIconUrl))
(dissoc (val %) :favIconUrl :icon))
(:url-times data))))
(assoc :site-times (accumulate-site-times (:url-times data))))
1 (->
data
(assoc :data-version 2)
(assoc :site-times (accumulate-site-times (:url-times data))))
2 (let [url-times (into {}
(->>
(:url-times data)
(map #(vector (key %)
(-> (val %)
(assoc :time (quot (:time (val %)) 1000))
(rename-keys {:timestamp :ts}))))
(remove #(= 0 (:time (second %))))))]
(assoc data
:data-version 3
:url-times url-times
:site-times (accumulate-site-times url-times))
)
data
))
......
......@@ -58,8 +58,8 @@
the result."
[url]
(if (not-empty url)
(let [element (-> (dommy/create-element :a)
(dommy/set-attr! :href url))
(let [element (-> (dommy/create-element :a)
(dommy/set-attr! :href url))
shortened (str (.toLowerCase (.-host element)) (.-pathname element) (.-search element))]
(hash-string shortened))
0))
......@@ -67,13 +67,11 @@
(defn time-display
"Returns a display string for a number of milliseconds"
[millis]
(let [seconds (quot millis 1000)]
(cond
(< seconds 1) "< 1s"
(< seconds 60) (str seconds "s")
(< seconds 3600) (str (quot seconds 60) "min " (rem seconds 60) "s")
(< seconds 86400) (str (quot seconds 3600) "h " (quot (rem seconds 3600) 60) "min")
;; TODO: 86592666 is returning "1d 0h", we should elide the lowest if it's 0
:else (str (quot seconds 86400) "d " (quot (rem seconds 86400) 3600) "h"))
))
\ No newline at end of file
[seconds]
(cond
(< seconds 1) "< 1s"
(< seconds 60) (str seconds "s")
(< seconds 3600) (str (quot seconds 60) "min " (rem seconds 60) "s")
(< seconds 86400) (str (quot seconds 3600) "h " (quot (rem seconds 3600) 60) "min")
;; TODO: 86592666 is returning "1d 0h", we should elide the lowest if it's 0
:else (str (quot seconds 86400) "d " (quot (rem seconds 86400) 3600) "h")))
\ No newline at end of file
......@@ -47,7 +47,7 @@
;; Tab items we actually care about
(def relevant-tab-items [:index :url :title :favIconUrl])
(def relevant-tab-items [:index :url :title :icon])
;;;;----------------------------
......@@ -180,7 +180,7 @@
(map-indexed
(fn [i tab]
(let [url (:url tab)
favicon (:favIconUrl (get site-data (host-key (hostname url))))
favicon (:icon (get site-data (host-key (hostname url))))
title (:title tab)
label (if (empty? title)
url
......@@ -238,13 +238,13 @@
@to-list
(map-indexed
(fn [i site]
(let [url (:host site)
favicon (:favIconUrl site)]
(let [url (:host site)
icon (:icon site)]
^{:key i}
[:tr
[:td {:class "col-sm-1"} (time-display (:time site))]
[:td {:class "col-sm-6"} (if favicon
[:img {:src favicon
[:td {:class "col-sm-6"} (if icon
[:img {:src icon
:width 16
:height 16}])
url]
......
This diff is collapsed.
(ns relevance.test.migrations
(:require [cljs.test :refer-macros [deftest testing is are]]
[clojure.set :refer [rename-keys]]
[relevance.migrations :as migrations]
[relevance.utils :as utils]
))
......@@ -8,35 +9,41 @@
{:instance-id "67b5c8eb-ae97-42ad-b6bc-803ac7e31221"
:suspend-info nil
:url-times {1274579744
{:url "https://developer.chrome.com/extensions/examples/api/contextMenus/basic/sample.js",
:time 117300,
:timestamp 1445964037798,
:title "https://developer.chrome.com/extensions/examples/api/contextMenus/basic/sample.js",
:favIconUrl "https://developer.chrome.com/favicon.ico"}
{:url "https://developer.chrome.com/extensions/examples/api/contextMenus/basic/sample.js",
:time 117300,
:timestamp 1445964037798,
:title "https://developer.chrome.com/extensions/examples/api/contextMenus/basic/sample.js",
:icon "https://developer.chrome.com/favicon.ico"}
-1400165536
{:url "https://www.polygon.com/2015/10/27/9623950/gravity-rush-2-ps4-north-america",
:time 14711,
:timestamp 1446036279627,
:title "Gravity Rush 2 confirmed for North America | Polygon",
:favIconUrl "https://cdn2.vox-cdn.com/community_logos/42931/favicon.ico"}
{:url "https://www.polygon.com/2015/10/27/9623950/gravity-rush-2-ps4-north-america",
:time 14711,
:timestamp 1446036279627,
:title "Gravity Rush 2 confirmed for North America | Polygon",
:icon "https://cdn2.vox-cdn.com/community_logos/42931/favicon.ico"}
-2272190
{:url "http://lanyrd.com/conferences/",
:time 5617,
:timestamp 1446047687895,
:title "Conferences and events worldwide | Lanyrd",
:favIconUrl nil}
{:url "http://lanyrd.com/conferences/",
:time 5617,
:timestamp 1446047687895,
:title "Conferences and events worldwide | Lanyrd",
:icon nil}
-327358142
{:url "https://developer.chrome.com/extensions/contextMenus",
:time 901682,
:timestamp 1446028215734,
:title "chrome.contextMenus - Google Chrome",
:favIconUrl "https://www.google.com/images/icons/product/chrome-32.png"}
{:url "https://developer.chrome.com/extensions/contextMenus",
:time 901682,
:timestamp 1446028215734,
:title "chrome.contextMenus - Google Chrome",
:icon "https://www.google.com/images/icons/product/chrome-32.png"}
1917381154
{:url "http://www.kitco.com/market/",
:time 4432,
:timestamp 1446051494575,
:title "New York spot price Gold...",
:favIconUrl nil
{:url "http://www.kitco.com/market/",
:time 4432,
:timestamp 1446051494575,
:title "New York spot price Gold...",
:icon nil
}
1038158073
{:url "http://splunk.com/"
:time 290
:ts 1446028215912
:title "Splunk"
}
}})
......@@ -56,7 +63,8 @@
))
(testing "v1 migration"
(let [v1 (migrations/migrate base-data)
v2 (migrations/migrate v1)]
v2 (migrations/migrate v1)
v3 (migrations/migrate v2)]
(is (not= v1 base-data))
(is (= 5 (count v1)) "We should have received five keys")
(are [k] (some? (k v1)) :url-times :instance-id :data-version :site-times)
......@@ -64,12 +72,13 @@
(is (= 1 (:data-version v1)) "Data should have been tagged with the version")
(doseq [[k v] (:url-times v1)]
(is (integer? k))
(is (nil? (:favIconUrl v)))
(is (nil? (:icon v)))
)
(is (= (:site-times v1) {-331299663 {:host "developer.chrome.com" :time 1018982 :favIconUrl "https://developer.chrome.com/favicon.ico"}
-967938826 {:host "www.polygon.com" :time 14711 :favIconUrl "https://cdn2.vox-cdn.com/community_logos/42931/favicon.ico"}
-1466097211 {:host "lanyrd.com" :time 5617 :favIconUrl nil}
-915908674 {:host "www.kitco.com" :time 4432 :favIconUrl nil}})
(is (= (:site-times v1) {-331299663 {:host "developer.chrome.com" :time 1018982 :icon "https://developer.chrome.com/favicon.ico"}
-967938826 {:host "www.polygon.com" :time 14711 :icon "https://cdn2.vox-cdn.com/community_logos/42931/favicon.ico"}
-1466097211 {:host "lanyrd.com" :time 5617 :icon nil}
-915908674 {:host "www.kitco.com" :time 4432 :icon nil}
1557509622 {:host "splunk.com" :time 290 :icon nil}})
"Site data should have been aggregated")
;; Test v2 migration
;; Moving from v1 to v2 retains all data, but loses the favIconUrl for the sites, since
......@@ -77,12 +86,35 @@
(is (= v2 (-> v1
(assoc :data-version 2)
(assoc :site-times (into {} (map #(vector (key %)
(assoc (val %):favIconUrl nil))
(assoc (val %) :icon nil))
(:site-times v1))))
)))
;; Test v3 migration
(is (= 3 (:data-version v3)))
(is (= (count (:url-times v3))
(dec (count (:url-times v2)))))
(is (nil? (get (:url-times v3) 1038158073)) "We shouldn't have Splunk's page anymore")
(doseq [[k v] (:url-times v3)
:let [in-v2 (get (:url-times v2) k)]]
;; Every item is its equivalent of the v2 item, but the precision is seconds
(is (= v
(-> in-v2
(assoc :time (quot (:time in-v2) 1000))
(rename-keys {:timestamp :ts}))
)))
(is (= (count (:site-times v3))
(dec (count (:site-times v2)))))
(is (nil? (get (:site-times v3) 1557509622)) "We shouldn't have Splunk's host anymore, since we have spent less than 1 second there")
(doseq [[k v] (:site-times v3)
:let [in-v2 (get (:site-times v2) k)]]
;; Every item is its equivalent of the v2 item, but the precision is seconds
(is (= v (-> in-v2
(assoc :time (quot (:time in-v2) 1000))
(rename-keys {:icon :icon}))
)))
;; Test recurrent migration
(is (= v2 (migrations/migrate-to-latest base-data)) "Migrating all the way to the latest should yield the same v2 data")
(is (= v2 (migrations/migrate-to-latest v1)) "Migrating all the way to the latest should yield the same v2 data")
(is (= v3 (migrations/migrate-to-latest base-data)) "Migrating all the way to the latest should yield the same v2 data")
(is (= v3 (migrations/migrate-to-latest v1)) "Migrating all the way to the latest should yield the same v2 data")
(is (not= base-data (migrations/migrate-to-latest base-data)) "Migration loop should have returned a different data set")
))
)
......
......@@ -42,19 +42,17 @@
(deftest test-time-display
(are [time label] (= (utils/time-display time) label)
500 "< 1s"
999 "< 1s"
1000 "1s"
1001 "1s"
1999 "1s"
3742 "3s"
49231 "49s"
124076 "2min 4s"
762661 "12min 42s"
8659266 "2h 24min"
86592666 "1d 0h"
124076042 "1d 10h"
248996042 "2d 21h"
0 "< 1s"
0.999 "< 1s"
1 "1s"
3 "3s"
49 "49s"
124 "2min 4s"
762 "12min 42s"
8659 "2h 24min"
86592 "1d 0h"
124076 "1d 10h"
248996 "2d 21h"
))
(deftest test-host
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment