В этой статье я приведу пример скрипта, который я использовал для парсинга логов своих сайтов с хостинга.

Вот пример нескольких строк из лога access.log:

qliksense.ivan-shamaev.ru 95.55.137.247 - - [06/Jan/2021:00:01:11 +0300] "GET /feed/ HTTP/1.1" 304 - "-" "FeedDemon/4.5 (http://www.feeddemon.com/; Microsoft Windows)" 242702 149999:66666
qliksense.ivan-shamaev.ru 213.87.161.129 - - [06/Jan/2021:00:01:11 +0300] "GET /sas-cpm-activity-based-costing-and-activity-based-budgeting/ HTTP/1.1" 200 291311 "https://qliksense.ivan-shamaev.ru/" "Mozilla/5.0 (Linux; Android 9; Redmi Note 8 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36" 530205 336666:109999
qliksense.ivan-shamaev.ru 109.254.191.1 - - [06/Jan/2021:00:02:01 +0300] "POST /wp-admin/admin-ajax.php HTTP/1.1" 200 2 "https://qliksense.ivan-shamaev.ru/doing-swot-analysis/" "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 OPR/73.0.3856.284" 274095 149999:83333

Вот пример строк из error.log:

[Wed Jan 06 17:56:45 2021] [error] [client 176.31.246.76:23467] AH01797: client denied by server configuration: /homeenv/useradmin/qliksense.ivan-shamaev.ru/wp-login.php, referer http://qliksense.ivan-shamaev.ru/

Логи лежат в нескольних директориях. Поэтому я использую inline таблицу для настройки скрипта, откуда грузить логи. Итак вот код парсинга, в котором я вытаскиваю нужные мне параметры:

// Директории
DirList:
Load * Inline [
    DirectoryName
    ivan-shamaev.ru
    qliksense.ivan-shamaev.ru
];

// access.log
For iRow = 0 to NoOfRows('DirList') - 1

    LET vDirectoryName = Peek('DirectoryName',$(iRow),'DirList');

    For Each vFile in FileList('lib://WebSitesLogs/$(vDirectoryName)/*.access.log*')
    
        Trace >>> LOAD FILE: $(vFile);
        
        [TEMP access.log]:
        LOAD
        	Distinct
            "@1:n"		As [Initial Access Log Text]
        FROM [$(vFile)]
        (fix, codepage is 28591, no labels);
        
        Trace --------------------------;

    Next

Next iRow;

//==========================================

SET vSubStringSettingsBot = '''*bot*''';
SET vSubStringSettingsWordPress = '''*wordpress*''';
SET vSubStringSettingsPython = '''*python*''';
SET vSubStringSettingsSocialNetAndSearchers = '''*vk.com*'',''*facebook.com*'',''*mail.ru*''';
SET vSubStringSettingsNuclei = '''*nuclei*'',''nuclei*''';


Qualify *;

[AL]:
Load
    RowNo()											As RowID,
    *,
    SubField([ResponseText],' ',1)								As [Response Code],
    SubField([Type of Request Full Text],' ',1)							As [Type of Request],
    SubField([Type of Request Full Text],' ',2)							As [URL Request],
    Pick(
    	RangeSum(WildMatch(
                    lower([Request Details]),
                        '*bot*',
                        '*wordpress*',
                        '*python*',
                        '*vk.com*','*facebook.com*','*mail.ru*',
                        '*nuclei*'),
                1),
        'Indefined',
        'Bot',
        'WordPress',
        'Python',
        'Social Network & Search Engines','Social Network & Search Engines','Social Network & Search Engines',
        'Nuclei'
        )									As [Mannual Category of Request]
;
Load
    [Initial Access Log Text],
    [Web Site Name],
    [IP],
    [TimeStamp],
    Date(Floor(Date#(Left([TimeStamp],11),'DD/MMM/YYYY')))			As Date,
    Time(Time#(Trim(Mid([TimeStamp],13,8)),'hh:mm:ss'))				As [Time],
    trim(replace(SubField([Second Part Of Text],'" ',1),'"',''))		As [Type of Request Full Text],
    trim(replace(SubField([Second Part Of Text],'" ',2),'"',''))		As [ResponseText],
    trim(replace(SubField([Second Part Of Text],'" ',3),'"',''))		As [Request Details],
    trim(replace(SubField([Second Part Of Text],'" ',4),'"',''))		As [Others 1]
;
Load
    [Initial Access Log Text],
    replace(SubField([Initial Access Log Text],' ',1),'www.','')		As [Web Site Name],
    SubField([Initial Access Log Text],' ',2) 					As [IP],
    TextBetween([Initial Access Log Text],'[',']')				As [TimeStamp],
    Trim(Mid([Initial Access Log Text],
    	index([Initial Access Log Text],']')+1))				As [Second Part Of Text]
Resident [TEMP access.log];

Unqualify *;

Drop Table [TEMP access.log];

// error.log
For iRow = 0 to NoOfRows('DirList') - 1

    LET vDirectoryName = Peek('DirectoryName',$(iRow),'DirList');

    For Each vFile in FileList('lib://WebSitesLogs/$(vDirectoryName)/*.error.log*')
    
        Trace >>> LOAD FILE: $(vFile);
        
        [TEMP error.log]:
        LOAD
            FileName()		As [File Name],
            "@1:n"			As [Initial Error Log Text]
        FROM [$(vFile)]
        (fix, codepage is 28591, no labels);
        
        Trace --------------------------;

    Next

Next iRow;


Qualify *;

  [EL]:
  Load
      RowNo()													As RowID,
      *,
      SubField([IP Port],':',1)											As IP,
      SubField([IP Port],':',2)											As Port
  ;
  Load
      *,
      Date(Floor(Date#(Mid([TimeStamp Text],5,6)&' '&Right([TimeStamp Text],4),'MMM DD YYYY')))			As Date,
      Time(Time#(Mid([TimeStamp Text],12,8),'hh:mm:ss'))							As Time,
      SubField([Client and IP],' ',1)										As Client,
      SubField([Client and IP],' ',2)										As [IP Port],
      SubField([Other Text],':',1)										As [Type Error],
      SubField(SubField([Other Text],':',2),' (',1)								As [Type Details Error]
  ;
  Load
      left([File Name],index([File Name],'error.log')-2)							As [Web Site],
      [Initial Error Log Text],
      replace(replace(SubField([Initial Error Log Text],'] ',1),'[',''),']','')		As [TimeStamp Text],
      replace(replace(SubField([Initial Error Log Text],'] ',2),'[',''),']','')		As [Type of message],
      replace(replace(SubField([Initial Error Log Text],'] ',3),'[',''),']','')		As [Client and IP],
      replace(replace(SubField([Initial Error Log Text],'] ',4),'[',''),']','')		As [Other Text]
  Resident [TEMP error.log];

Unqualify *;

Drop Table [TEMP error.log];

В приведенном выше коде есть пример сложного парсинга, определение даты из строки, разложения строки лога с помощью различных функций, таких как:

  • SubField
  • TextBetween
5 1 голос
Рейтинг статьи

Подписаться
Уведомление о
guest
0 комментариев
Встроенная Обратная Связь
Просмотр всех комментариев
0
Оставьте, пожалуйста, комментарий!x