原始log里面,时间是用文本形式存放的。一行一条时间记录和命令记录,这样,查询某个时间段的所有执行命令就有些不方便。弄了个脚本,爬到数据库中查
初步的想法是,如果要查3天的log,就爬3天的log进去,下次查询别的,清空后再爬新的进去。这样就不会有存储量的问题和查询不方便的问题。
注:仅适用于user 和host在正式的log开始前几行的情况,regxf函数写的比较挫,后面偷懒不去修改了,正常的做法应该要分开用条件判断下。
代码如下:
#!/usr/bin/python
# Filename : recordlog.py
# import re module for regx
import re
# import MySQLdb module to access mysql
import MySQLdb
import os
import sys
#define a list function for mysql connect execute and close
def init():
db_host='127.0.0.1'
db_port=3306
db_user='mydbtest'
db_passwd='mydbtest'
db_name='mydbtest'
try:
conn = MySQLdb.connect(host=db_host, port=db_port, user=db_user, passwd=db_passwd, db=db_name)
except MySQLDb.OperationalError,error:
print "Connect Mysql[%s %s/%s %s] DB Error:"%(db_host,db_user,db_passwd,db_name),error,"\n"
return None
return conn
def executeSql(sql,conn):
cursor = conn.cursor()
cursor.execute(sql)
def closeConnect(conn):
conn.close()
# define a function name regxf to search the string from log
# var0 is the string of log,
# var1 is the map of the dict for insert into the database,
# var2 is the map of host and user
# var3 is the name of the full file
def regxf(str,dict,dictcount,filename):
regx = re.compile(r'(\d+-\d+-\d+ \d+:\d+:\d+).*outline=(\d*).*rawpos=(\d*) (.*)')
p = regx.match(str)
# If it isn't the end of the log file
if p is not None:
#for i in range(1,len(p.groups())+1):
# print p.group(i)
# clean the dic for new line
dict.clear()
dict['time']=p.group(1)
dict['outline']=p.group(2)
dict['rawline']=p.group(3)
dict['common']=p.group(4)
return 'insert into shterm_log values(NULL,\''+MySQLdb.escape_string(dict['time'])+'\',\''+MySQLdb.escape_string(dictcount['user'])+'\',\''+MySQLdb.escape_string(dictcount['host'])+'\',\''+MySQLdb.escape_string(dict['common'])+'\',\''+filename+'\',\''+MySQLdb.escape_string(dict['outline'])+'\',\''+MySQLdb.escape_string(dict['rawline'])+'\')'
#if it's the last line of the file, we can log the finish time
elif p is None:
#p = re.search(r'(\d+-\d+-\d+ \d+:\d+:\d+).*',str)
p = re.match(r'(\d+-\d+-\d+ \d+:\d+:\d+) disconnect.*',str)
if p is not None:
# do not log the end time of the session now
#for i in range(1,len(p.groups())+1):
#print p.group(i)
return None
elif p is None:
p = re.match(r'user: (.*)',str)
if p is not None:
#print p.group(1)
dictcount['user'] = p.group(1)
return None
else:
# for host
p = re.match(r'to_ipaddr:(.*)',str)
if p is not None:
#print p.group(1)
dictcount['host'] = p.group(1)
return None
else:
return None
def insertFromLogfile(filename):
#init the connect to mysql
conn = init()
f = file(filename,'r');
dictattrib = dict()
dictcount = dict()
while True:
line = f.readline();
if len(line) == 0:
break
sql = regxf(line,dictattrib,dictcount,filename)
if sql is not None:
executeSql(sql,conn)
else:
continue
#close the file handle
f.close()
#close the mysql connect
closeConnect(conn)
args = sys.argv
top = args[1]
print "search log from "+top
for root,dirs, files in os.walk(top,topdown=False):
for name in files:
if len(os.path.splitext(name))>1 and len(os.path.splitext(name)[-1])>0:
ext = os.path.splitext(name)[-1]
if ext == '.log':
fullname = os.path.join(root,name)
insertFromLogfile(fullname)
else:
continue
else:
continue
#1