Commit 43eee333 authored by Mikko Ahlroth's avatar Mikko Ahlroth

Sync users progressively throughout the day instead of all in one go

This should lower the load on the server considerably
parent aa116d46
Pipeline #39499500 passed with stages
in 7 minutes and 29 seconds
......@@ -53,6 +53,14 @@ nano config/dev.secret.exs # Set up dev config with at least the line "use Mix.
* `mix frontend.clean`: Clean frontend output and build artifacts
* `MINIFY=true mix frontend.build`: Build frontend with minification.
### Env vars for development
```
RUN_CACHES=true # If set to any value, will generate user caches even in dev mode, otherwise user
# caches will not be generated
MINIFY=true # If set to true, generated frontend assets will be minified.
```
## Production
Generate [Distillery](https://hex.pm/packages/distillery) release:
......
......@@ -20,10 +20,10 @@ defmodule CodeStats do
worker(CodeStats.User.Terminator, [])
]
# Start XPCacheRefresher if in prod
# Start XPCacheRefresher if in prod or if told to
children =
case CodeStats.Utils.get_conf(:compile_env) do
:dev -> children
case {CodeStats.Utils.get_conf(:compile_env), System.get_env("RUN_CACHES")} do
{:dev, nil} -> children
_ -> children ++ [worker(CodeStats.XP.XPCacheRefresher, [])]
end
......
......@@ -134,21 +134,15 @@ defmodule CodeStats.User do
@doc """
Calculate and store cached XP values for user.
If `update_all` is set, all XP is gathered and the whole cache is replaced, not
just added to. This results in a total recalculation of all the user's XP.
If `update_all` is set, XP is gathered since the given datetime. If the value is `:all`, then
all of the users XP is processed.
"""
def update_cached_xps(user, update_all \\ false) do
@spec update_cached_xps(%__MODULE__{}, nil | :all | DateTime.t()) :: map
def update_cached_xps(user, since \\ nil) do
update_start_time = DateTime.utc_now()
last_cached =
if not update_all and user.last_cached != nil do
user.last_cached
else
DateTime.from_naive!(~N[1970-01-01T00:00:00], "Etc/UTC")
end
# If update_all is given or user cache is empty, don't use any previous cache data
cached_data = %{
empty_cache = %{
languages: %{},
machines: %{},
dates: %{},
......@@ -159,11 +153,18 @@ defmodule CodeStats.User do
total_caching_duration: 0
}
cached_data =
case {update_all, user.cache} do
{true, _} -> cached_data
{_, nil} -> cached_data
_ -> unformat_cache_from_db(user.cache)
all_since = DateTime.from_naive!(~N[1970-01-01T00:00:00], "Etc/UTC")
{xp_since, cached_data} =
cond do
match?(%DateTime{}, since) ->
{since, empty_cache}
since == :all or is_nil(user.last_cached) ->
{all_since, unformat_cache_from_db(user.cache)}
true ->
{user.last_cached, unformat_cache_from_db(user.cache)}
end
# Load all of user's new XP plus required associations
......@@ -172,7 +173,7 @@ defmodule CodeStats.User do
x in XP,
join: p in Pulse,
on: p.id == x.pulse_id,
where: p.user_id == ^user.id and p.inserted_at >= ^last_cached,
where: p.user_id == ^user.id and p.inserted_at >= ^xp_since,
select: {p, x}
)
......@@ -195,7 +196,7 @@ defmodule CodeStats.User do
}
# Correct key for storing caching duration
duration_key = if update_all, do: :total_caching_duration, else: :caching_duration
duration_key = if since == :all, do: :total_caching_duration, else: :caching_duration
# Store cache that is formatted for DB and add caching duration
stored_cache =
......@@ -208,7 +209,7 @@ defmodule CodeStats.User do
# Persist cache changes and update user's last cached timestamp
user
|> cast(%{cache: stored_cache}, [:cache])
|> put_change(:last_cached, DateTime.utc_now())
|> put_change(:last_cached, DateTime.utc_now() |> DateTime.truncate(:second))
|> Repo.update!()
# Return the cache data for the caller
......
......@@ -9,8 +9,17 @@ defmodule CodeStats.XP.XPCacheRefresher do
alias CodeStats.{Repo, User}
# Run every 24 hours
@how_often 24 * 60 * 60 * 1000
# Run about every second minute (120 seconds after last run)
@how_often 2 * 60 * 1000
# How many seconds back to fetch data for the "last 24h sync"
@sync_24h_secs 24 * 60 * 60
# How many users to sync for the "last 24h sync"
@sync_24h_count 50
# How many users to sync totally
@sync_total_count 1
def start_link do
GenServer.start_link(__MODULE__, %{})
......@@ -33,12 +42,40 @@ defmodule CodeStats.XP.XPCacheRefresher do
end
@doc """
Refresh XP caches of all users in the system.
Refresh XP caches users in the system.
Will pick a bunch of the users that have not been synced in the last 24 hours (least recently
synced first), and sync their data from the last 24 hours. For this sync, if the user does not
have any pulses added after the last cache time, they are not synced.
After that, pick a smaller list of users (least recently synced first) and sync them totally.
"""
@spec do_refresh() :: :ok
def do_refresh() do
from(u in User, select: u)
sync_24h()
sync_total()
end
defp sync_24h() do
now = DateTime.utc_now()
then = Calendar.DateTime.subtract!(now, @sync_24h_secs)
from(u in User,
join: p in User.Pulse,
on: p.user_id == u.id,
where: u.last_cached < ^then and p.inserted_at > ^then,
group_by: u.id,
having: count(p) > 0,
order_by: [asc: u.last_cached],
limit: @sync_24h_count
)
|> Repo.all()
|> Enum.each(&User.update_cached_xps(&1, then))
end
defp sync_total() do
from(u in User, order_by: [asc: u.last_cached], limit: @sync_total_count)
|> Repo.all()
|> Enum.each(fn user -> User.update_cached_xps(user, true) end)
|> Enum.each(&User.update_cached_xps(&1, :all))
end
end
  • I'm not quite sure if the caches will be correct when you recalculate them for "last 24h" every 2 minutes for a single user.

    The full cache rebuilds will happen for one user every 2 minutes, ie. 720 users a day. That sounds like pretty much enough to me.

  • It should check that the user cache is older than 24 hrs when it's updated. But obviously then it should use the last cache time and not 24 hrs as limit.

    And now that I think of it, what is the point of doing it at all if the cache is updated when the profile is viewed and when the full cache run hits every few days anyway? Damn what was I thinking writing this?

    I will improve this next week and get rid of the 24 hrs part as unnecessary.

  • Yeah I'm pretty sure eg. the hour data is like

    12-13 30
    13-14 23
    ...

    and when you have a pulse with, say, 2xp today at 13:34 it's going to be added every time the last 24 hour job runs. Which can be every 2 minutes if you're active and few other people are.

    I think the only safe ways to work with the cache is to add values after the previous cache generation or a full rebuild.

  • Right. Disregard my last message. :)

Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment