mirror of
https://github.com/btdig/dhtcrawler2.git
synced 2025-01-31 10:31:37 +00:00
integrate cache_index for hash_reader, default is disabled
This commit is contained in:
parent
a0e1be291d
commit
b5692682e2
@ -22,8 +22,8 @@ do_insert(Conn, Hash) ->
|
||||
|
||||
exist(Conn, Hash) when is_list(Hash) ->
|
||||
Sel = {'_id', list_to_binary(Hash)},
|
||||
{Doc} = mongo:do(safe, master, Conn, ?DBNAME, fun() ->
|
||||
Doc = mongo:do(safe, master, Conn, ?DBNAME, fun() ->
|
||||
mongo:find_one(?COLLNAME, Sel)
|
||||
end),
|
||||
Doc == {}.
|
||||
Doc /= {}.
|
||||
|
||||
|
@ -41,6 +41,11 @@ terminate(_, State) ->
|
||||
code_change(_, _, State) ->
|
||||
{ok, State}.
|
||||
|
||||
handle_info(filter_torrent, State) ->
|
||||
Conn = db_conn(State),
|
||||
try_next_download(Conn),
|
||||
{noreply, State};
|
||||
|
||||
handle_info({got_torrent, failed, _Hash}, State) ->
|
||||
#state{downloading = D} = State,
|
||||
Conn = db_conn(State),
|
||||
@ -87,7 +92,7 @@ handle_info(process_download_hash, State) ->
|
||||
try_next_download(Conn),
|
||||
% until the max downloader count reaches
|
||||
timer:send_after(?DOWNLOAD_INTERVAL, process_download_hash),
|
||||
D + 1
|
||||
D % + 1, bug here ?
|
||||
end,
|
||||
{noreply, State#state{downloading = NewD}};
|
||||
|
||||
@ -206,7 +211,28 @@ insert_to_download_wait(Conn, Doc) ->
|
||||
|
||||
try_next_download(Conn) ->
|
||||
Doc = load_delete_next(Conn, ?HASH_DOWNLOAD_COLL),
|
||||
schedule_next(Doc, true).
|
||||
check_in_index_cache(Conn, Doc).
|
||||
|
||||
check_in_index_cache(_, {}) ->
|
||||
ok;
|
||||
check_in_index_cache(Conn, {Doc}) ->
|
||||
{Hash} = bson:lookup(hash, Doc),
|
||||
ListHash = binary_to_list(Hash),
|
||||
Try = should_try_download(config:get(check_cache, false), Conn, ListHash),
|
||||
case Try of
|
||||
true ->
|
||||
schedule_next({Doc}, true);
|
||||
false ->
|
||||
% not in the local cache index, which means it may not exist on the server
|
||||
% so give it up
|
||||
hash_reader_stats:handle_cache_filtered(),
|
||||
self() ! filter_torrent
|
||||
end.
|
||||
|
||||
should_try_download(true, Conn, Hash) ->
|
||||
db_hash_index:exist(Conn, Hash);
|
||||
should_try_download(false, _, _) ->
|
||||
true.
|
||||
|
||||
% if there's no hash, try `wait_download'
|
||||
try_next(Conn) ->
|
||||
|
@ -19,9 +19,10 @@
|
||||
handle_used_cache/0,
|
||||
handle_download_ok/0,
|
||||
handle_download_failed/0,
|
||||
handle_cache_filtered/0,
|
||||
dump/0]).
|
||||
-record(state, {tref, count, start, name, cache_used = 0,
|
||||
updated = 0, inserted = 0,
|
||||
updated = 0, inserted = 0, cache_filtered = 0,
|
||||
download_ok = 0, download_failed = 0}).
|
||||
-define(STATS_INTERVAL, 10*60*1000).
|
||||
-define(TEXT(Fmt, Arg), lists:flatten(io_lib:format(Fmt, Arg))).
|
||||
@ -53,6 +54,9 @@ handle_download_ok() ->
|
||||
handle_download_failed() ->
|
||||
gen_server:cast(srv_name(), inc_download_failed).
|
||||
|
||||
handle_cache_filtered() ->
|
||||
gen_server:cast(srv_name(), inc_cache_filtered).
|
||||
|
||||
srv_name() ->
|
||||
?MODULE.
|
||||
|
||||
@ -99,6 +103,10 @@ handle_cast(inc_download_ok, State) ->
|
||||
handle_cast(inc_download_failed, State) ->
|
||||
#state{download_failed = D} = State,
|
||||
{noreply, State#state{download_failed = D + 1}};
|
||||
|
||||
handle_cast(inc_cache_filtered, State) ->
|
||||
#state{cache_filtered = D} = State,
|
||||
{noreply, State#state{cache_filtered = D + 1}};
|
||||
|
||||
handle_cast(stop, State) ->
|
||||
{stop, normal, State}.
|
||||
@ -131,6 +139,7 @@ date_string() ->
|
||||
|
||||
format_stats(State) ->
|
||||
#state{count = C, start = Start, cache_used = Cache,
|
||||
cache_filtered = CacheFiltered,
|
||||
download_ok = DO, download_failed = DF,
|
||||
updated = U, inserted = I} = State,
|
||||
{Day, {H, M, S}} = stats_time(Start),
|
||||
@ -146,6 +155,7 @@ format_stats(State) ->
|
||||
?TEXT(" Download torrents speed ~p tor/min~n", [I div TotalMins]) ++
|
||||
?TEXT(" Download success ~p~n", [DO]) ++
|
||||
?TEXT(" Download failed ~p~n", [DF]) ++
|
||||
?TEXT(" Cache Index Filtered ~p~n", [CacheFiltered]) ++
|
||||
?TEXT(" Updated ~p~n", [U]) ++
|
||||
?TEXT(" Inserted ~p~n", [I]) ++
|
||||
?TEXT(" Inserted percentage ~.2f%~n", [InsertPercent]) ++
|
||||
|
@ -74,4 +74,5 @@ config_default() ->
|
||||
{save_to_file, true},
|
||||
{load_from_db, false},
|
||||
{text_seg, simple},
|
||||
{check_cache, false},
|
||||
{torrent_path, "torrents/"}].
|
||||
|
Loading…
Reference in New Issue
Block a user