mirror of
https://github.com/btdig/dhtcrawler2.git
synced 2025-02-23 21:59:04 +00:00
add progress displaying for cache_indexer
This commit is contained in:
parent
062130cbd9
commit
a0e1be291d
@ -6,6 +6,7 @@
|
||||
-module(index_file).
|
||||
-export([start/2]).
|
||||
-export([worker_run/3]).
|
||||
-define(PRINT_STEP, 5000*41).
|
||||
|
||||
start(Conn, FileName) ->
|
||||
spawn_link(?MODULE, worker_run, [self(), Conn, FileName]).
|
||||
@ -13,6 +14,7 @@ start(Conn, FileName) ->
|
||||
load_position(Name) ->
|
||||
StatusFile = Name ++ ".sta",
|
||||
Pos = case file:consult(StatusFile) of
|
||||
{ok, []} -> 0;
|
||||
{ok, [Status]} ->
|
||||
proplists:get_value(position, Status);
|
||||
{error, _} ->
|
||||
@ -29,22 +31,32 @@ worker_run(Parent, Conn, FileName) ->
|
||||
Pos = load_position(FileName),
|
||||
io:format("start to process ~s from ~p~n", [FileName, Pos]),
|
||||
{ok, FP} = file:open(FileName, [read]),
|
||||
{ok, MaxPos} = file:position(FP, eof),
|
||||
file:position(FP, Pos),
|
||||
Sum = process_hash(Conn, FileName, FP),
|
||||
Step = MaxPos div 10, % every 10% to display progress
|
||||
CheckStep = if Step < ?PRINT_STEP -> ?PRINT_STEP; true -> Step end,
|
||||
Sum = process_hash(Conn, FileName, FP, Pos, MaxPos, CheckStep),
|
||||
Parent ! {worker_done, self(), FileName},
|
||||
file:close(FP),
|
||||
io:format("Index file ~s done, ~p hashes~n", [FileName, Sum]).
|
||||
|
||||
process_hash(Conn, FileName, FP) ->
|
||||
process_hash(Conn, FileName, FP, PrintPos, MaxPos, CheckStep) ->
|
||||
case io:get_line(FP, "") of
|
||||
eof -> 0;
|
||||
Line ->
|
||||
save_hash(Conn, strip_lf(Line)),
|
||||
{ok, Pos} = file:position(FP, cur),
|
||||
save_position(FileName, Pos),
|
||||
1 + process_hash(Conn, FileName, FP)
|
||||
NewPrintPos = check_progress(FileName, Pos, PrintPos, MaxPos, CheckStep),
|
||||
1 + process_hash(Conn, FileName, FP, NewPrintPos, MaxPos, CheckStep)
|
||||
end.
|
||||
|
||||
check_progress(FileName, Pos, PrintPos, MaxPos, CheckStep) when Pos >= PrintPos ->
|
||||
save_position(FileName, Pos),
|
||||
io:format("~s -> ~b%~n", [FileName, 100 * Pos div MaxPos]),
|
||||
PrintPos + CheckStep;
|
||||
check_progress(_, _, PrintPos, _, _) ->
|
||||
PrintPos.
|
||||
|
||||
strip_lf(S) ->
|
||||
lists:sublist(S, length(S) - 1).
|
||||
|
||||
|
34
tools/download_sync.bat
Normal file
34
tools/download_sync.bat
Normal file
@ -0,0 +1,34 @@
|
||||
wget http://torrage.com/sync/201201.txt
|
||||
wget http://torrage.com/sync/201202.txt
|
||||
wget http://torrage.com/sync/201203.txt
|
||||
wget http://torrage.com/sync/201204.txt
|
||||
wget http://torrage.com/sync/201205.txt
|
||||
wget http://torrage.com/sync/201206.txt
|
||||
wget http://torrage.com/sync/201207.txt
|
||||
wget http://torrage.com/sync/201208.txt
|
||||
wget http://torrage.com/sync/201209.txt
|
||||
wget http://torrage.com/sync/201210.txt
|
||||
wget http://torrage.com/sync/201211.txt
|
||||
wget http://torrage.com/sync/201212.txt
|
||||
wget http://torrage.com/sync/201301.txt
|
||||
wget http://torrage.com/sync/201302.txt
|
||||
wget http://torrage.com/sync/201303.txt
|
||||
wget http://torrage.com/sync/201304.txt
|
||||
wget http://torrage.com/sync/201305.txt
|
||||
wget http://torrage.com/sync/201306.txt
|
||||
wget http://torrage.com/sync/20130701.txt
|
||||
wget http://torrage.com/sync/20130702.txt
|
||||
wget http://torrage.com/sync/20130703.txt
|
||||
wget http://torrage.com/sync/20130704.txt
|
||||
wget http://torrage.com/sync/20130705.txt
|
||||
wget http://torrage.com/sync/20130706.txt
|
||||
wget http://torrage.com/sync/20130707.txt
|
||||
wget http://torrage.com/sync/20130708.txt
|
||||
wget http://torrage.com/sync/20130709.txt
|
||||
wget http://torrage.com/sync/20130710.txt
|
||||
wget http://torrage.com/sync/20130711.txt
|
||||
wget http://torrage.com/sync/20130712.txt
|
||||
wget http://torrage.com/sync/20130713.txt
|
||||
wget http://torrage.com/sync/20130714.txt
|
||||
wget http://torrage.com/sync/20130715.txt
|
||||
|
Loading…
Reference in New Issue
Block a user