mirror of
https://github.com/btdig/dhtcrawler2.git
synced 2025-01-31 10:31:37 +00:00
add a new force to string log func, add log to httpd, it can log unicode characters to logfiles
This commit is contained in:
parent
3b0e5701c8
commit
2a9f99940a
@ -29,4 +29,8 @@
|
|||||||
|
|
||||||
-define(FMT(S, A), lists:flatten(io_lib:format(S, A))).
|
-define(FMT(S, A), lists:flatten(io_lib:format(S, A))).
|
||||||
|
|
||||||
|
% force to string
|
||||||
|
-define(LOG_STR(Lvl, X),
|
||||||
|
vlog:format(Lvl, "~s [~s] {~p, ~p}: ~s~n",
|
||||||
|
[?LVLS(Lvl), vlog:time_string(), ?MODULE, ?LINE, X])).
|
||||||
-endif.
|
-endif.
|
||||||
|
@ -13,13 +13,16 @@
|
|||||||
-define(TEXT(Fmt, Args), lists:flatten(io_lib:format(Fmt, Args))).
|
-define(TEXT(Fmt, Args), lists:flatten(io_lib:format(Fmt, Args))).
|
||||||
-define(MAX_FILE, 3).
|
-define(MAX_FILE, 3).
|
||||||
-define(CONTENT_TYPE, "Content-Type: application/json\r\n\r\n").
|
-define(CONTENT_TYPE, "Content-Type: application/json\r\n\r\n").
|
||||||
|
-include("vlog.hrl").
|
||||||
|
|
||||||
% search?q=keyword
|
% search?q=keyword
|
||||||
search(SessionID, _Env, Input) ->
|
search(SessionID, Env, Input) ->
|
||||||
Res = case http_common:get_search_keyword(Input) of
|
Res = case http_common:get_search_keyword(Input) of
|
||||||
[] ->
|
[] ->
|
||||||
"{\"error\":\"null input\", \"suggest\":\"api/search?q=keyword\"}";
|
"{\"error\":\"null input\", \"suggest\":\"api/search?q=keyword\"}";
|
||||||
Keyword ->
|
Keyword ->
|
||||||
|
US = http_common:list_to_utf_binary(Keyword),
|
||||||
|
?LOG_STR(?INFO, ?FMT("API: remote ~p search /~s/", [http_common:remote_addr(Env), US])),
|
||||||
do_search(Keyword)
|
do_search(Keyword)
|
||||||
end,
|
end,
|
||||||
mod_esi:deliver(SessionID, [?CONTENT_TYPE, Res]).
|
mod_esi:deliver(SessionID, [?CONTENT_TYPE, Res]).
|
||||||
@ -48,6 +51,9 @@ stats(SessionID, _Env, _Input) ->
|
|||||||
do_search(Keyword) ->
|
do_search(Keyword) ->
|
||||||
{Rets, Stats} = http_cache:search(Keyword),
|
{Rets, Stats} = http_cache:search(Keyword),
|
||||||
{_Found, Cost, Scanned} = Stats,
|
{_Found, Cost, Scanned} = Stats,
|
||||||
|
CostSecs = Cost / 1000 / 1000,
|
||||||
|
US = http_common:list_to_utf_binary(Keyword),
|
||||||
|
?LOG_STR(?INFO, ?FMT("API: search /~s/ found ~p, cost ~f secs", [US, Scanned, CostSecs])),
|
||||||
Tip = ?TEXT("{\"keyword\":\"~s\",\"found\":~p,\"cost\":~p,", [Keyword, Scanned, Cost div 1000]),
|
Tip = ?TEXT("{\"keyword\":\"~s\",\"found\":~p,\"cost\":~p,", [Keyword, Scanned, Cost div 1000]),
|
||||||
BodyList = format_search_result(Rets),
|
BodyList = format_search_result(Rets),
|
||||||
Body = ?TEXT("\"results\":[~s]}", [BodyList]),
|
Body = ?TEXT("\"results\":[~s]}", [BodyList]),
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
page_temp/0,
|
page_temp/0,
|
||||||
stop/0]).
|
stop/0]).
|
||||||
-record(state, {html_temp, httpid}).
|
-record(state, {html_temp, httpid}).
|
||||||
|
-include("vlog.hrl").
|
||||||
|
|
||||||
% start from command line, erl -run crawler_http start localhost 27017 8000 5
|
% start from command line, erl -run crawler_http start localhost 27017 8000 5
|
||||||
start([DBHostS, DBPortS, PortS, PoolSizeS]) ->
|
start([DBHostS, DBPortS, PortS, PoolSizeS]) ->
|
||||||
@ -27,6 +28,8 @@ start([DBHostS, DBPortS, PortS, PoolSizeS]) ->
|
|||||||
start(DBHost, DBPort, HttpPort, PoolSize).
|
start(DBHost, DBPort, HttpPort, PoolSize).
|
||||||
|
|
||||||
start(DBHost, DBPort, Port, PoolSize) ->
|
start(DBHost, DBPort, Port, PoolSize) ->
|
||||||
|
filelib:ensure_dir("log/"),
|
||||||
|
vlog:start_link("log/crawler_http.log", ?INFO),
|
||||||
code:add_path("deps/bson/ebin"),
|
code:add_path("deps/bson/ebin"),
|
||||||
code:add_path("deps/mongodb/ebin"),
|
code:add_path("deps/mongodb/ebin"),
|
||||||
Apps = [crypto, public_key, ssl, inets, bson, mongodb],
|
Apps = [crypto, public_key, ssl, inets, bson, mongodb],
|
||||||
@ -46,6 +49,7 @@ srv_name() ->
|
|||||||
crawler_http.
|
crawler_http.
|
||||||
|
|
||||||
init([DBHost, DBPort, Port, PoolSize]) ->
|
init([DBHost, DBPort, Port, PoolSize]) ->
|
||||||
|
?I(?FMT("httpd startup ~p", [Port])),
|
||||||
process_flag(trap_exit, true),
|
process_flag(trap_exit, true),
|
||||||
db_frontend:start(DBHost, DBPort, PoolSize),
|
db_frontend:start(DBHost, DBPort, PoolSize),
|
||||||
http_cache:start_link(),
|
http_cache:start_link(),
|
||||||
|
@ -6,8 +6,13 @@
|
|||||||
-module(http_common).
|
-module(http_common).
|
||||||
-export([get_search_keyword/1,
|
-export([get_search_keyword/1,
|
||||||
get_view_hash/1,
|
get_view_hash/1,
|
||||||
|
remote_addr/1,
|
||||||
|
list_to_utf_binary/1,
|
||||||
sort_file_by_size/1]).
|
sort_file_by_size/1]).
|
||||||
|
|
||||||
|
remote_addr(Env) ->
|
||||||
|
proplists:get_value(remote_addr, Env).
|
||||||
|
|
||||||
get_search_keyword(Input) ->
|
get_search_keyword(Input) ->
|
||||||
get_q_arg(Input).
|
get_q_arg(Input).
|
||||||
|
|
||||||
@ -28,3 +33,13 @@ sort_file_by_size(Files) ->
|
|||||||
lists:sort(fun({_, L1}, {_, L2}) ->
|
lists:sort(fun({_, L1}, {_, L2}) ->
|
||||||
L1 > L2
|
L1 > L2
|
||||||
end, Files).
|
end, Files).
|
||||||
|
|
||||||
|
% io:format("~ts", [list_to_utf_binary(L)])
|
||||||
|
% io:format(FP, "~s", [list_to_utf_binary(L)])
|
||||||
|
list_to_utf_binary(L) ->
|
||||||
|
BK = list_to_binary(L),
|
||||||
|
UL = unicode:characters_to_list(BK),
|
||||||
|
US = unicode:characters_to_binary(UL),
|
||||||
|
US.
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,12 +15,15 @@
|
|||||||
-define(TEXT(Fmt, Args), lists:flatten(io_lib:format(Fmt, Args))).
|
-define(TEXT(Fmt, Args), lists:flatten(io_lib:format(Fmt, Args))).
|
||||||
-import(torrent_file, [size_string/1]).
|
-import(torrent_file, [size_string/1]).
|
||||||
-define(CONTENT_TYPE, "Content-Type: text/html\r\n\r\n").
|
-define(CONTENT_TYPE, "Content-Type: text/html\r\n\r\n").
|
||||||
|
-include("vlog.hrl").
|
||||||
|
|
||||||
search(SessionID, _Env, Input) ->
|
search(SessionID, Env, Input) ->
|
||||||
{K, Body} = case http_common:get_search_keyword(Input) of
|
{K, Body} = case http_common:get_search_keyword(Input) of
|
||||||
[] ->
|
[] ->
|
||||||
{"", "invalid input"};
|
{"", "invalid input"};
|
||||||
Key ->
|
Key ->
|
||||||
|
US = http_common:list_to_utf_binary(Key),
|
||||||
|
?LOG_STR(?INFO, ?FMT("remote ~p search /~s/", [http_common:remote_addr(Env), US])),
|
||||||
{Key, do_search(Key)}
|
{Key, do_search(Key)}
|
||||||
end,
|
end,
|
||||||
Response = simple_html(K, Body),
|
Response = simple_html(K, Body),
|
||||||
@ -87,8 +90,11 @@ do_search(Keyword) when length(Keyword) =< 1 ->
|
|||||||
do_search(Keyword) ->
|
do_search(Keyword) ->
|
||||||
{Rets, Stats} = http_cache:search(Keyword),
|
{Rets, Stats} = http_cache:search(Keyword),
|
||||||
{_Found, Cost, Scanned} = Stats,
|
{_Found, Cost, Scanned} = Stats,
|
||||||
|
CostSecs = Cost / 1000 / 1000,
|
||||||
|
US = http_common:list_to_utf_binary(Keyword),
|
||||||
|
?LOG_STR(?INFO, ?FMT("search /~s/ found ~p, cost ~f secs", [US, Scanned, CostSecs])),
|
||||||
Tip = ?TEXT("<h4>search ~s, ~b results, ~f seconds</h4>",
|
Tip = ?TEXT("<h4>search ~s, ~b results, ~f seconds</h4>",
|
||||||
[Keyword, Scanned, Cost / 1000 / 1000]),
|
[Keyword, Scanned, CostSecs ]),
|
||||||
BodyList = format_search_result(Rets),
|
BodyList = format_search_result(Rets),
|
||||||
Body = ?TEXT("<ol>~s</ol>", [lists:flatten(BodyList)]),
|
Body = ?TEXT("<ol>~s</ol>", [lists:flatten(BodyList)]),
|
||||||
Tip ++ Body.
|
Tip ++ Body.
|
||||||
|
Loading…
Reference in New Issue
Block a user