nginx 日志解析
设置nginx日志格式默认变量格式:log_format combined '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"';$remote_addr变量:记录了客户端的IP地址(普通情况下)。$remote_user变
Nginx是一个高性能的HTTP和反向代理服务器。Nginx access日志记录了web应用的访问记录。大致记录了访问方式(POST/GET)、客户端IP、远程用户、请求时间、请求状态码、访问host地址、请求页面大小、reffer信息、x_forwarded_for地址等等。nginx access日志的格式不是一成不变的,是可以自定义的。Nginx access具体日志格式与在服务器的存储位置可以查看nginx.conf配置文件。Nginx详细记录了每一次web请求。
设置nginx日志格式
默认变量格式:log_format combined '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"';
$remote_addr变量:记录了客户端的IP地址(普通情况下)。
$remote_user变量:当nginx开启了用户认证功能后,此变量记录了客户端使用了哪个用户进行了认证。
$time_local变量:记录了当前日志条目的时间。
$request变量:记录了当前http请求的方法、url和http协议版本。
$status变量:记录了当前http请求的响应状态,即响应的状态码,比如200、404等响应码,都记录在此变量中。
$body_bytes_sent变量:记录了nginx响应客户端请求时,发送到客户端的字节数,不包含响应头的大小。
$http_referer变量:记录了当前请求是从哪个页面过来的,比如你点了A页面中的超链接才产生了这个请求,那么此变量中就记录了A页面的url。
$http_user_agent变量:记录了客户端的软件信息,比如,浏览器的名称和版本号。
增加变量:
'"$http_host" "$request_time" "$upstream_response_time" "$upstream_connect_time" "$upstream_header_time"';
$http_host 请求地址,即浏览器中你输入的地址(IP或域名)
$request_time:处理请求的总时间,包含了用户数据接收时间
$upstream_response_time:建立连接和从上游服务器接收响应主体的最后一个字节之间的时间
$upstream_connect_time:花费在与上游服务器建立连接上的时间
$upstream_header_time:建立连接和从上游服务器接收响应头的第一个字节之间的时间
修改后的自定义格式:
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" '
'"$http_host" "$request_time" "$upstream_response_time" "$upstream_connect_time" "$upstream_header_time"';
Nginx的详细配置 参考:nginx短篇(6):访问日志-朱双印博客
解析nginx日志
日志内容:
192.168.100.175 - - [14/Jun/2020:20:18:59 +0800] "POST /mall_open_api.php?token=9f1063d6bf1ec1 HTTP/1.0" 300 1829 "-" "Dart/2.8 (dart:io)" "-" "mall-api.carisok.com" "0.556" "0.556" "0.000" "0.556"
192.168.100.173 - - [14/Jun/2020:20:18:59 +0800] "POST /mall_open_api.php?token=a3a9c69 HTTP/1.0" 200 1829 "-" "Dart/2.8 (dart:io)" "-" "mall-api.carisok.com" "0.446" "0.445" "0.000" "0.445"
192.168.100.175 - - [14/Jun/2020:20:19:03 +0800] "POST /mall_open_api.php?token=b9b0eadf7 HTTP/1.0" 200 731 "-" "Dart/2.8 (dart:io)" "-" "mall-api.carisok.com" "0.153" "0.154" "0.000" "0.154"
192.168.100.176 - - [14/Jun/2020:20:19:14 +0800] "POST /mall_open_api.php?token=242b947c HTTP/1.0" 200 97 "-" "Dart/2.8 (dart:io)" "-" "mall-api.carisok.com" "0.425" "0.425" "0.000" "0.425"
日志解析:正则表达式,通过java的pattern和matter类捕获变量
String pattern = "(?<ip>\\d+\\.\\d+\\.\\d+\\.\\d+)(?<datetime> - - \\[(.*?)])(?<t1>\\s[\\\\\"]+)(?<requestMethod>[A-Z[/url]]+)(?<t2> )(?<requestUrl>\\S+\\s+)(?<protocol>\\S+\")(?<status> \\d+)(?<bytes> \\d+)" +
"(?<referer> \"(.*?)\")(?<agent> \"(.*?)\")(?<forwarded> \"(.*?)\")(?<host> \"(.*?)\")" +
"(?<requestTime> \"(.*?)\")(?<responseTime> \"(.*?)\")(?<connectTime> \"(.*?)\")(?<headerTime> \"(.*?)\")"
实现代码: 先逐行读取文件中的日志,存储到list中,然后通过并行计算框架 ForkJoin 解析list中每一个日志信息并合并相同URI的统计信息。最终得出每条URI当天的访问统计数据(总的访问次数、成功次数、平均访问响应时间等)
public class forkJoin_log {
public static void main(String[] args) {
Long start_time = System.currentTimeMillis();
//逐行读取日志文件
String fileName = "C:\\Users\\Administrator\\Desktop\\log\\access_20200615.log";
//String fileName = "C:\\Users\\Lin\\Desktop\\log.txt";
ArrayList<String> logList = readFileByLines(fileName);
System.out.println("log size: " + logList.size());
Long load_file_time = System.currentTimeMillis();
System.out.println("load file time: "+ (load_file_time - start_time));
//创建分治任务线程池
ForkJoinPool fjp = new ForkJoinPool(5);
//创建分治任务
Log fib = new Log(logList,0,logList.size());
//启动分治任务
Map<String, RequestMsg> result = fjp.invoke(fib);
//输出结果
Long end_time = System.currentTimeMillis();
System.out.println("result size: "+result.size());
System.out.println("forkjoin compute_time: "+ (end_time - load_file_time));
System.out.println("total time: "+ (end_time-start_time));
}
//分治任务
static class Log extends RecursiveTask<Map<String,RequestMsg>> {
private int start;
private int end;
private ArrayList<String> logList;
Log(ArrayList<String> logList,int start,int end){
this.logList = logList;
this.start = start;
this.end = end;
}
@Override
protected Map<String, RequestMsg> compute(){
//终止条件(不可再细分,就执行计算任务)
if(end - start == 1)
return calc(logList.get(start));
int mid = (start+end)/2;
Log f1 = new Log(logList,start,mid);
Log f2 = new Log(logList,mid,end);
//f1创建⼦任务,f2执行计算任务,避免出现只分配任务不执行任务的情况
f1.fork();
//等待⼦任务结果,并合并结果
return merge(f2.compute(), f1.join());
}
}
public static Map<String, RequestMsg> merge(Map<String,RequestMsg> requestMap1, Map<String,RequestMsg> requestMap2){
Map<String, RequestMsg> result = new HashMap<>();
result.putAll(requestMap1);
//合并结果
requestMap2.forEach((k,v) -> {
RequestMsg rm = result.get(k);
if (rm!=null){
//result.put(k,c+v);
Integer succeed_visit_times = rm.getSucceed_visit_times() + v.getSucceed_visit_times();
if(succeed_visit_times != 0){
if(rm.getSucceed_visit_times()==0){
rm.setMax_request_time(v.getMax_request_time());
rm.setMin_request_time(v.getMin_request_time());
rm.setMax_response_time(v.getMax_response_time());
rm.setMin_response_time(v.getMin_response_time());
rm.setAverage_request_time(v.getAverage_request_time());
rm.setAverage_response_time(v.getAverage_response_time());
rm.setSucceed_visit_times(v.getSucceed_visit_times());
}
else if (v.getSucceed_visit_times()==0){
//
}
else {
Double all_request_time = rm.getAverage_request_time() * rm.getSucceed_visit_times() + v.getAverage_request_time() * v.getSucceed_visit_times();
Double all_response_time = rm.getAverage_response_time() * rm.getSucceed_visit_times() + v.getAverage_response_time() * v.getSucceed_visit_times();
BigDecimal b1 = new BigDecimal(all_request_time);
BigDecimal b2 = new BigDecimal(all_response_time);
BigDecimal b3 = new BigDecimal(succeed_visit_times);
Double average_request_time = b1.divide(b3, 3, BigDecimal.ROUND_HALF_UP).doubleValue();
Double average_response_time = b2.divide(b3, 3, BigDecimal.ROUND_HALF_UP).doubleValue();
rm.setAverage_request_time(average_request_time);
rm.setAverage_response_time(average_response_time);
rm.setSucceed_visit_times(succeed_visit_times);
}
}
rm.setVisit_times(rm.getVisit_times() + v.getVisit_times());
}
else
result.put(k,v);
});
return result;
}
public static Map<String,RequestMsg> calc(String log){
Map<String, RequestMsg> result = new HashMap<>();
parseLine(log,result);
return result;
}
public static ArrayList<String> readFileByLines(String fileName) {
File file = new File(fileName);
BufferedReader reader = null;
ArrayList<String> logLIST = new ArrayList<String>();
try {
System.out.println("以行为单位读取文件内容,一次读一整行:");
reader = new BufferedReader(new FileReader(file));
String tempString = null;
int line = 1;
//一次读入一行,直到读入null为文件结束
while ((tempString = reader.readLine()) != null) {
//显示行号
logLIST.add(tempString);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return logLIST;
}
public static RequestMsg parseLine(String str,Map<String, RequestMsg> url){
String requestStatus="";
RequestMsg requestMsg = new RequestMsg();
//部分变量之间没有空格符,需注意
String pattern = "(?<ip>\\d+\\.\\d+\\.\\d+\\.\\d+)(?<datetime> - - \\[(.*?)])(?<t1>\\s[\\\\\"]+)(?<requestMethod>[A-Z[/url]]+)(?<t2> )(?<requestUrl>\\S+\\s)(?<protocol>\\S+\")(?<status> \\d+)(?<bytes> \\d+)" +
"(?<referer> \"(.*?)\")(?<agent> \"(.*?)\")(?<forwarded> \"(.*?)\")(?<host> \"(.*?)\")" +
"(?<requestTime> \"(.*?)\")(?<responseTime> \"(.*?)\")(?<connectTime> \"(.*?)\")(?<headerTime> \"(.*?)\")";
//String pattern = "(?<ip>\\d+\\.\\d+\\.\\d+\\.\\d+)(?<datetime> - - \\[(.*?)])(?<t1>\\s[\\\\\"]+)(?<requestMethod>[A-Z[/url]]+)(?<t2> )(?<requestUrl>\\S+\\s)(?<protocol>\\S+\")(?<status>\\s+\\d+)(?<bytes>\\s+\\d+)";
Pattern r = Pattern.compile(pattern);
Matcher m = r.matcher(str);
if(m.find()){
String gap = " + ";
String ip = m.group("ip");
String datetime = m.group("datetime").replaceAll(" - - |\\[|\\]","");
String requestMethod = m.group("requestMethod");
//获取 requestUrl 并去掉参数
String requestUrl = m.group("requestUrl");
Pattern r1 = Pattern.compile("\\/(.*?)\\?");
Matcher m1 = r1.matcher(requestUrl);
while(m1.find()) requestUrl=m1.group().replaceAll(" |\\?","");
requestUrl = requestUrl.replaceAll(" ","");
requestUrl = requestUrl.length()>255?requestUrl.substring(0,250):requestUrl;
requestUrl = requestUrl.indexOf(",")!=-1?requestUrl.substring(requestUrl.indexOf(",")+1):requestUrl;
String protocol = m.group("protocol").replaceAll("\"","");
String status = m.group("status").substring(1);
String bytes = m.group("bytes").substring(1);
String referer = m.group("referer").substring(1).replaceAll("\"","");
String requestTime = m.group("requestTime").substring(1).replaceAll("\"","");
requestTime = requestTime.equals("-")?"0.000":requestTime ;
String responseTime = m.group("responseTime").substring(1).replaceAll("\"","");
responseTime = responseTime.equals("-")?"0.000":responseTime ;
responseTime = responseTime.indexOf(",")!=-1?responseTime.substring(responseTime.indexOf(",")+1):responseTime;
//记录当次的接口信息
requestMsg.setVisit_times(1);
requestMsg.setRemote_addr(ip);
requestMsg.setTime_local(datetime);
requestMsg.setRequestMethod(requestMethod);
requestMsg.setRequestUrl(requestUrl);
requestMsg.setProtocol(protocol);
requestMsg.setStatus(Integer.parseInt(status));
requestMsg.setBytes(Integer.parseInt(bytes));
requestMsg.setHttp_referer(referer);
requestMsg.setRequest_time(new Double(requestTime));
requestMsg.setResponse_time(new Double(responseTime));
//记录访问成功的相关变量
if(status.equals("200")) {
requestMsg.setMax_request_time(new Double(requestTime));
requestMsg.setMin_request_time(new Double(requestTime));
requestMsg.setMax_response_time(new Double(responseTime));
requestMsg.setMin_response_time(new Double(responseTime));
requestMsg.setAverage_request_time(new Double(requestTime));
requestMsg.setAverage_response_time(new Double(responseTime));
requestMsg.setSucceed_visit_times(1);
}else{
requestMsg.setAverage_request_time(new Double(0));
requestMsg.setAverage_response_time(new Double(0));
requestMsg.setSucceed_visit_times(0);
}
url.put(requestUrl,requestMsg);
}
return requestMsg;
}
}
public class RequestMsg {
//基础变量
private Integer visit_times; // 访问次数
private String remote_addr; // ip:120.92.182.49
private String time_local; // [11/Jun/2020:00:00:51 +0800]
private String requestMethod; // GET POST
private String requestUrl; // /aggr/robot.php
private String protocol; // HTTP/1.0
private Integer status; // 200
private Integer bytes; // 1550
private String http_referer; // 请求源url
private Double request_time; // 处理请求的总时间 0.340 (秒)
private Double response_time; // 服务端的响应时间 0.340 (秒)
private Double connect_time; // 0.000 (秒)
private Double header_time; // 0.340 (秒)
private Double log_date; // 日志日期 20200101
//聚合变量
private Double max_request_time; // 最大处理请求时间:状态码为200的请求的最大客户端请求时间
private Double max_response_time; // 最大服务端响应时间:状态码为200的请求的最大服务端响应时间
private Double min_request_time; // 最小处理请求时间:状态码为200的请求的最小客户端请求时间
private Double min_response_time; // 最小服务端响应时间:状态码为200的请求的最小服务端响应时间
private Double average_request_time; // 平均处理请求时间:状态码为200的请求的平均客户端请求时间
private Double average_response_time; // 平均服务端响应时间:状态码为200的请求的平均服务端响应时间
private Integer succeed_visit_times; // 状态码为200 的次数
@Override
public String toString() {
String RequestMsg = "RequestMsg = {visit_times="+visit_times + "; remote_addr="+remote_addr + "; time_local="+time_local
+ "; requestMethod="+requestMethod + "; requestUrl="+requestUrl + "; protocol="+protocol + "; status="+status
+ "; bytes="+bytes + "; http_referer="+http_referer + "; request_time="+request_time + "; response_time="+response_time
+ "; connect_time="+connect_time + "; header_time="+header_time + "; log_date="+log_date
+ "; max_request_time=" + max_request_time
+ "; max_response_time="+max_response_time + "; min_request_time="+min_request_time
+ "; min_response_time="+min_response_time + "; average_request_time="+average_request_time
+ "; average_response_time="+average_response_time + "; succeed_visit_times="+succeed_visit_times;
return RequestMsg;
}
public Double getLog_date() {
return log_date;
}
public void setLog_date(Double log_date) {
this.log_date = log_date;
}
public Double getMax_request_time() {
return max_request_time;
}
public void setMax_request_time(Double max_request_time) {
this.max_request_time = max_request_time;
}
public Double getMax_response_time() {
return max_response_time;
}
public void setMax_response_time(Double max_response_time) {
this.max_response_time = max_response_time;
}
public Double getMin_request_time() {
return min_request_time;
}
public void setMin_request_time(Double min_request_time) {
this.min_request_time = min_request_time;
}
public Double getMin_response_time() {
return min_response_time;
}
public void setMin_response_time(Double min_response_time) {
this.min_response_time = min_response_time;
}
public Integer getVisit_times() {
return visit_times;
}
public void setVisit_times(Integer visit_times) {
this.visit_times = visit_times;
}
public String getRemote_addr() {
return remote_addr;
}
public void setRemote_addr(String remote_addr) {
this.remote_addr = remote_addr;
}
public String getTime_local() {
return time_local;
}
public void setTime_local(String time_local) {
this.time_local = time_local;
}
public String getRequestMethod() {
return requestMethod;
}
public void setRequestMethod(String requestMethod) {
this.requestMethod = requestMethod;
}
public String getRequestUrl() {
return requestUrl;
}
public void setRequestUrl(String requestUrl) {
this.requestUrl = requestUrl;
}
public String getProtocol() {
return protocol;
}
public void setProtocol(String protocol) {
this.protocol = protocol;
}
public Integer getStatus() {
return status;
}
public void setStatus(Integer status) {
this.status = status;
}
public Integer getBytes() {
return bytes;
}
public void setBytes(Integer bytes) {
this.bytes = bytes;
}
public String getHttp_referer() {
return http_referer;
}
public void setHttp_referer(String http_referer) {
this.http_referer = http_referer;
}
public Double getRequest_time() {
return request_time;
}
public void setRequest_time(Double request_time) {
this.request_time = request_time;
}
public Double getResponse_time() {
return response_time;
}
public void setResponse_time(Double response_time) {
this.response_time = response_time;
}
public Double getConnect_time() {
return connect_time;
}
public void setConnect_time(Double connect_time) {
this.connect_time = connect_time;
}
public Double getHeader_time() {
return header_time;
}
public void setHeader_time(Double header_time) {
this.header_time = header_time;
}
public Double getAverage_request_time() {
return average_request_time;
}
public void setAverage_request_time(Double average_request_time) {
this.average_request_time = average_request_time;
}
public Double getAverage_response_time() {
return average_response_time;
}
public void setAverage_response_time(Double average_response_time) {
this.average_response_time = average_response_time;
}
public Integer getSucceed_visit_times() {
return succeed_visit_times;
}
public void setSucceed_visit_times(Integer succeed_visit_times) {
this.succeed_visit_times = succeed_visit_times;
}
}
参考链接:
nginx日志解析:java正则解析 - about云VIP小组群 - About云开发 - Powered by Discuz!
更多推荐
所有评论(0)