mirror of
https://github.com/btdig/dhtcrawler2.git
synced 2025-01-19 12:41:36 +00:00
add http cache
This commit is contained in:
parent
bdaf17fec1
commit
720a680743
@ -47,6 +47,7 @@ srv_name() ->
|
|||||||
init([DBHost, DBPort, Port]) ->
|
init([DBHost, DBPort, Port]) ->
|
||||||
process_flag(trap_exit, true),
|
process_flag(trap_exit, true),
|
||||||
db_frontend:start(DBHost, DBPort, 2),
|
db_frontend:start(DBHost, DBPort, 2),
|
||||||
|
http_cache:start_link(),
|
||||||
{ok, Pid} = inets:start(httpd, [
|
{ok, Pid} = inets:start(httpd, [
|
||||||
{modules, [mod_alias, mod_auth, mod_esi, mod_actions,
|
{modules, [mod_alias, mod_auth, mod_esi, mod_actions,
|
||||||
mod_cgi, mod_dir, mod_get, mod_head, mod_log, mod_disk_log]},
|
mod_cgi, mod_dir, mod_get, mod_head, mod_log, mod_disk_log]},
|
||||||
|
124
src/http_front/http_cache.erl
Normal file
124
src/http_front/http_cache.erl
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
%%
|
||||||
|
%% http_cache.erl
|
||||||
|
%% Kevin Lynx
|
||||||
|
%% 07.03.2013
|
||||||
|
%%
|
||||||
|
-module(http_cache).
|
||||||
|
-behaviour(gen_server).
|
||||||
|
-export([init/1,
|
||||||
|
handle_call/3,
|
||||||
|
handle_cast/2,
|
||||||
|
handle_info/2,
|
||||||
|
terminate/2,
|
||||||
|
code_change/3]).
|
||||||
|
-export([start_link/0,
|
||||||
|
stop/0,
|
||||||
|
search/1,
|
||||||
|
today_top/0]).
|
||||||
|
-record(state, {cache}).
|
||||||
|
-define(OUT_OF_DATE, 5*60*1000).
|
||||||
|
-define(CACHE_SIZE, 1000).
|
||||||
|
|
||||||
|
start_link() ->
|
||||||
|
gen_server:start_link({local, srv_name()}, ?MODULE, [], []).
|
||||||
|
|
||||||
|
stop() ->
|
||||||
|
gen_server:cast(srv_name(), stop).
|
||||||
|
|
||||||
|
search(Key) ->
|
||||||
|
gen_server:call(srv_name(), {query, {search, Key}}).
|
||||||
|
|
||||||
|
today_top() ->
|
||||||
|
gen_server:call(srv_name(), {query, top}).
|
||||||
|
|
||||||
|
async_top() ->
|
||||||
|
gen_server:cast(srv_name(), {update, top}).
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
async_top(),
|
||||||
|
{ok, #state{cache = gb_trees:empty()}}.
|
||||||
|
|
||||||
|
srv_name() ->
|
||||||
|
http_cache.
|
||||||
|
|
||||||
|
terminate(_, State) ->
|
||||||
|
{ok, State}.
|
||||||
|
|
||||||
|
code_change(_, _, State) ->
|
||||||
|
{ok, State}.
|
||||||
|
|
||||||
|
handle_cast(decrease_cache, State) ->
|
||||||
|
#state{cache = Cache} = State,
|
||||||
|
NewCache = remove_oldest(Cache),
|
||||||
|
async_top(), % make sure `top' exists
|
||||||
|
{noreply, State#state{cache = NewCache}};
|
||||||
|
|
||||||
|
handle_cast({update, Type}, State) ->
|
||||||
|
{NewState, _} = update(Type, State),
|
||||||
|
{noreply, NewState};
|
||||||
|
|
||||||
|
handle_cast(stop, State) ->
|
||||||
|
{stop, normal, State}.
|
||||||
|
|
||||||
|
handle_call({query, Type}, _From, State) ->
|
||||||
|
{NewState, Ret} = query(Type, State),
|
||||||
|
{reply, Ret, NewState};
|
||||||
|
|
||||||
|
handle_call(_, _From, State) ->
|
||||||
|
{noreply, State}.
|
||||||
|
|
||||||
|
handle_info(_, State) ->
|
||||||
|
{noreply, State}.
|
||||||
|
|
||||||
|
query(Type, State) ->
|
||||||
|
#state{cache = Cache} = State,
|
||||||
|
case gb_trees:is_defined(Type, Cache) of
|
||||||
|
false ->
|
||||||
|
update(Type, State);
|
||||||
|
true ->
|
||||||
|
do_query(Type, State)
|
||||||
|
end.
|
||||||
|
|
||||||
|
update(Type, #state{cache = Cache} = State) ->
|
||||||
|
Ret = do_update(Type),
|
||||||
|
Val = {now(), Ret},
|
||||||
|
io:format("update cache ~p~n", [Type]),
|
||||||
|
NewCache = gb_trees:enter(Type, Val, Cache),
|
||||||
|
case gb_trees:size(NewCache) >= ?CACHE_SIZE of
|
||||||
|
true ->
|
||||||
|
gen_server:cast(self(), decrease_cache);
|
||||||
|
false ->
|
||||||
|
ok
|
||||||
|
end,
|
||||||
|
{State#state{cache = NewCache}, Ret}.
|
||||||
|
|
||||||
|
do_update({search, Key}) ->
|
||||||
|
db_frontend:search(Key);
|
||||||
|
|
||||||
|
do_update(top) ->
|
||||||
|
db_frontend:today_top().
|
||||||
|
|
||||||
|
do_query(Type, #state{cache = Cache} = State) ->
|
||||||
|
{Start, Ret} = gb_trees:get(Type, Cache),
|
||||||
|
case is_outofdate(Start) of
|
||||||
|
true ->
|
||||||
|
gen_server:cast(self(), {update, Type});
|
||||||
|
false ->
|
||||||
|
ok
|
||||||
|
end,
|
||||||
|
{State, Ret}.
|
||||||
|
|
||||||
|
is_outofdate(Time) ->
|
||||||
|
(timer:now_diff(now(), Time) div 1000) > ?OUT_OF_DATE.
|
||||||
|
|
||||||
|
remove_oldest(Cache) ->
|
||||||
|
io:format("decrease cache ~p to ~p~n", [?CACHE_SIZE, ?CACHE_SIZE div 2]),
|
||||||
|
KeyVals = gb_trees:to_list(Cache),
|
||||||
|
Sorted = lists:sort(fun(A, B) -> compare_keyval(A, B) end, KeyVals),
|
||||||
|
{_, Rest} = lists:split(?CACHE_SIZE div 2, Sorted),
|
||||||
|
lists:foldl(fun({Key, Val}, Tree) ->
|
||||||
|
gb_trees:insert(Key, Val, Tree)
|
||||||
|
end, gb_trees:empty(), Rest).
|
||||||
|
|
||||||
|
compare_keyval({_, {T1, _}}, {_, {T2, _}}) ->
|
||||||
|
T1 =< T2.
|
@ -33,7 +33,7 @@ top(SessionID, _Env, _Input) ->
|
|||||||
mod_esi:deliver(SessionID, [?CONTENT_TYPE, Response]).
|
mod_esi:deliver(SessionID, [?CONTENT_TYPE, Response]).
|
||||||
|
|
||||||
today_top(SessionID, _Env, _Input) ->
|
today_top(SessionID, _Env, _Input) ->
|
||||||
Rets = db_frontend:today_top(),
|
Rets = http_cache:today_top(),
|
||||||
BodyList = format_search_result(Rets),
|
BodyList = format_search_result(Rets),
|
||||||
Body = ?TEXT("<ol>~s</ol>", [lists:flatten(BodyList)]),
|
Body = ?TEXT("<ol>~s</ol>", [lists:flatten(BodyList)]),
|
||||||
Response = simple_html("today_top", Body),
|
Response = simple_html("today_top", Body),
|
||||||
@ -90,7 +90,7 @@ test_search(Keyword) ->
|
|||||||
file:write_file(Filename, simple_html(Keyword, Body)).
|
file:write_file(Filename, simple_html(Keyword, Body)).
|
||||||
|
|
||||||
do_search(Keyword) ->
|
do_search(Keyword) ->
|
||||||
{Rets, Stats} = db_frontend:search(Keyword),
|
{Rets, Stats} = http_cache:search(Keyword),
|
||||||
{_Found, Cost, Scanned} = Stats,
|
{_Found, Cost, Scanned} = Stats,
|
||||||
Tip = ?TEXT("<h4>search ~s, ~b results, ~f seconds</h4>",
|
Tip = ?TEXT("<h4>search ~s, ~b results, ~f seconds</h4>",
|
||||||
[Keyword, Scanned, Cost / 1000 / 1000]),
|
[Keyword, Scanned, Cost / 1000 / 1000]),
|
||||||
|
Loading…
Reference in New Issue
Block a user