diff --git a/src/http_front/crawler_http.erl b/src/http_front/crawler_http.erl index 6b1f780..894afef 100644 --- a/src/http_front/crawler_http.erl +++ b/src/http_front/crawler_http.erl @@ -47,6 +47,7 @@ srv_name() -> init([DBHost, DBPort, Port]) -> process_flag(trap_exit, true), db_frontend:start(DBHost, DBPort, 2), + http_cache:start_link(), {ok, Pid} = inets:start(httpd, [ {modules, [mod_alias, mod_auth, mod_esi, mod_actions, mod_cgi, mod_dir, mod_get, mod_head, mod_log, mod_disk_log]}, diff --git a/src/http_front/http_cache.erl b/src/http_front/http_cache.erl new file mode 100644 index 0000000..8eb6f2a --- /dev/null +++ b/src/http_front/http_cache.erl @@ -0,0 +1,124 @@ +%% +%% http_cache.erl +%% Kevin Lynx +%% 07.03.2013 +%% +-module(http_cache). +-behaviour(gen_server). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). +-export([start_link/0, + stop/0, + search/1, + today_top/0]). +-record(state, {cache}). +-define(OUT_OF_DATE, 5*60*1000). +-define(CACHE_SIZE, 1000). + +start_link() -> + gen_server:start_link({local, srv_name()}, ?MODULE, [], []). + +stop() -> + gen_server:cast(srv_name(), stop). + +search(Key) -> + gen_server:call(srv_name(), {query, {search, Key}}). + +today_top() -> + gen_server:call(srv_name(), {query, top}). + +async_top() -> + gen_server:cast(srv_name(), {update, top}). + +init([]) -> + async_top(), + {ok, #state{cache = gb_trees:empty()}}. + +srv_name() -> + http_cache. + +terminate(_, State) -> + {ok, State}. + +code_change(_, _, State) -> + {ok, State}. + +handle_cast(decrease_cache, State) -> + #state{cache = Cache} = State, + NewCache = remove_oldest(Cache), + async_top(), % make sure `top' exists + {noreply, State#state{cache = NewCache}}; + +handle_cast({update, Type}, State) -> + {NewState, _} = update(Type, State), + {noreply, NewState}; + +handle_cast(stop, State) -> + {stop, normal, State}. + +handle_call({query, Type}, _From, State) -> + {NewState, Ret} = query(Type, State), + {reply, Ret, NewState}; + +handle_call(_, _From, State) -> + {noreply, State}. + +handle_info(_, State) -> + {noreply, State}. + +query(Type, State) -> + #state{cache = Cache} = State, + case gb_trees:is_defined(Type, Cache) of + false -> + update(Type, State); + true -> + do_query(Type, State) + end. + +update(Type, #state{cache = Cache} = State) -> + Ret = do_update(Type), + Val = {now(), Ret}, + io:format("update cache ~p~n", [Type]), + NewCache = gb_trees:enter(Type, Val, Cache), + case gb_trees:size(NewCache) >= ?CACHE_SIZE of + true -> + gen_server:cast(self(), decrease_cache); + false -> + ok + end, + {State#state{cache = NewCache}, Ret}. + +do_update({search, Key}) -> + db_frontend:search(Key); + +do_update(top) -> + db_frontend:today_top(). + +do_query(Type, #state{cache = Cache} = State) -> + {Start, Ret} = gb_trees:get(Type, Cache), + case is_outofdate(Start) of + true -> + gen_server:cast(self(), {update, Type}); + false -> + ok + end, + {State, Ret}. + +is_outofdate(Time) -> + (timer:now_diff(now(), Time) div 1000) > ?OUT_OF_DATE. + +remove_oldest(Cache) -> + io:format("decrease cache ~p to ~p~n", [?CACHE_SIZE, ?CACHE_SIZE div 2]), + KeyVals = gb_trees:to_list(Cache), + Sorted = lists:sort(fun(A, B) -> compare_keyval(A, B) end, KeyVals), + {_, Rest} = lists:split(?CACHE_SIZE div 2, Sorted), + lists:foldl(fun({Key, Val}, Tree) -> + gb_trees:insert(Key, Val, Tree) + end, gb_trees:empty(), Rest). + +compare_keyval({_, {T1, _}}, {_, {T2, _}}) -> + T1 =< T2. diff --git a/src/http_front/http_handler.erl b/src/http_front/http_handler.erl index efc7a91..9f24b88 100644 --- a/src/http_front/http_handler.erl +++ b/src/http_front/http_handler.erl @@ -33,7 +33,7 @@ top(SessionID, _Env, _Input) -> mod_esi:deliver(SessionID, [?CONTENT_TYPE, Response]). today_top(SessionID, _Env, _Input) -> - Rets = db_frontend:today_top(), + Rets = http_cache:today_top(), BodyList = format_search_result(Rets), Body = ?TEXT("