From 720a680743dfed492fcca0a11840a6ff985d61ea Mon Sep 17 00:00:00 2001 From: Kevin Lynx Date: Wed, 3 Jul 2013 17:20:07 +0800 Subject: [PATCH] add http cache --- src/http_front/crawler_http.erl | 1 + src/http_front/http_cache.erl | 124 ++++++++++++++++++++++++++++++++ src/http_front/http_handler.erl | 4 +- 3 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 src/http_front/http_cache.erl diff --git a/src/http_front/crawler_http.erl b/src/http_front/crawler_http.erl index 6b1f780..894afef 100644 --- a/src/http_front/crawler_http.erl +++ b/src/http_front/crawler_http.erl @@ -47,6 +47,7 @@ srv_name() -> init([DBHost, DBPort, Port]) -> process_flag(trap_exit, true), db_frontend:start(DBHost, DBPort, 2), + http_cache:start_link(), {ok, Pid} = inets:start(httpd, [ {modules, [mod_alias, mod_auth, mod_esi, mod_actions, mod_cgi, mod_dir, mod_get, mod_head, mod_log, mod_disk_log]}, diff --git a/src/http_front/http_cache.erl b/src/http_front/http_cache.erl new file mode 100644 index 0000000..8eb6f2a --- /dev/null +++ b/src/http_front/http_cache.erl @@ -0,0 +1,124 @@ +%% +%% http_cache.erl +%% Kevin Lynx +%% 07.03.2013 +%% +-module(http_cache). +-behaviour(gen_server). +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). +-export([start_link/0, + stop/0, + search/1, + today_top/0]). +-record(state, {cache}). +-define(OUT_OF_DATE, 5*60*1000). +-define(CACHE_SIZE, 1000). + +start_link() -> + gen_server:start_link({local, srv_name()}, ?MODULE, [], []). + +stop() -> + gen_server:cast(srv_name(), stop). + +search(Key) -> + gen_server:call(srv_name(), {query, {search, Key}}). + +today_top() -> + gen_server:call(srv_name(), {query, top}). + +async_top() -> + gen_server:cast(srv_name(), {update, top}). + +init([]) -> + async_top(), + {ok, #state{cache = gb_trees:empty()}}. + +srv_name() -> + http_cache. + +terminate(_, State) -> + {ok, State}. + +code_change(_, _, State) -> + {ok, State}. + +handle_cast(decrease_cache, State) -> + #state{cache = Cache} = State, + NewCache = remove_oldest(Cache), + async_top(), % make sure `top' exists + {noreply, State#state{cache = NewCache}}; + +handle_cast({update, Type}, State) -> + {NewState, _} = update(Type, State), + {noreply, NewState}; + +handle_cast(stop, State) -> + {stop, normal, State}. + +handle_call({query, Type}, _From, State) -> + {NewState, Ret} = query(Type, State), + {reply, Ret, NewState}; + +handle_call(_, _From, State) -> + {noreply, State}. + +handle_info(_, State) -> + {noreply, State}. + +query(Type, State) -> + #state{cache = Cache} = State, + case gb_trees:is_defined(Type, Cache) of + false -> + update(Type, State); + true -> + do_query(Type, State) + end. + +update(Type, #state{cache = Cache} = State) -> + Ret = do_update(Type), + Val = {now(), Ret}, + io:format("update cache ~p~n", [Type]), + NewCache = gb_trees:enter(Type, Val, Cache), + case gb_trees:size(NewCache) >= ?CACHE_SIZE of + true -> + gen_server:cast(self(), decrease_cache); + false -> + ok + end, + {State#state{cache = NewCache}, Ret}. + +do_update({search, Key}) -> + db_frontend:search(Key); + +do_update(top) -> + db_frontend:today_top(). + +do_query(Type, #state{cache = Cache} = State) -> + {Start, Ret} = gb_trees:get(Type, Cache), + case is_outofdate(Start) of + true -> + gen_server:cast(self(), {update, Type}); + false -> + ok + end, + {State, Ret}. + +is_outofdate(Time) -> + (timer:now_diff(now(), Time) div 1000) > ?OUT_OF_DATE. + +remove_oldest(Cache) -> + io:format("decrease cache ~p to ~p~n", [?CACHE_SIZE, ?CACHE_SIZE div 2]), + KeyVals = gb_trees:to_list(Cache), + Sorted = lists:sort(fun(A, B) -> compare_keyval(A, B) end, KeyVals), + {_, Rest} = lists:split(?CACHE_SIZE div 2, Sorted), + lists:foldl(fun({Key, Val}, Tree) -> + gb_trees:insert(Key, Val, Tree) + end, gb_trees:empty(), Rest). + +compare_keyval({_, {T1, _}}, {_, {T2, _}}) -> + T1 =< T2. diff --git a/src/http_front/http_handler.erl b/src/http_front/http_handler.erl index efc7a91..9f24b88 100644 --- a/src/http_front/http_handler.erl +++ b/src/http_front/http_handler.erl @@ -33,7 +33,7 @@ top(SessionID, _Env, _Input) -> mod_esi:deliver(SessionID, [?CONTENT_TYPE, Response]). today_top(SessionID, _Env, _Input) -> - Rets = db_frontend:today_top(), + Rets = http_cache:today_top(), BodyList = format_search_result(Rets), Body = ?TEXT("
    ~s
", [lists:flatten(BodyList)]), Response = simple_html("today_top", Body), @@ -90,7 +90,7 @@ test_search(Keyword) -> file:write_file(Filename, simple_html(Keyword, Body)). do_search(Keyword) -> - {Rets, Stats} = db_frontend:search(Keyword), + {Rets, Stats} = http_cache:search(Keyword), {_Found, Cost, Scanned} = Stats, Tip = ?TEXT("

search ~s, ~b results, ~f seconds

", [Keyword, Scanned, Cost / 1000 / 1000]),