T1593.002 Search Engines Detection — KQL & SPL Queries for Sentinel and Splunk

Microsoft Sentinel / Defender

kusto

let DorkPatterns = dynamic(["filetype:", "ext:", "inurl:", "intitle:", "intext:", "site:", "cache:", "allintitle:", "allinurl:"]);
let SensitiveTerms = dynamic(["password", "passwd", "credential", "api_key", "apikey", "secret", "token", "config", "backup", ".env", "admin", "vpn", "portal", "jira", "confluence", "database"]);
let SearchEngineReferrers = dynamic(["google.com/search", "bing.com/search", "search.yahoo.com", "duckduckgo.com", "yandex.com/search"]);
let RefererDorks = W3CIISLog
| where TimeGenerated > ago(24h)
| where isnotempty(csReferer) and csReferer has_any (SearchEngineReferrers)
| extend SearchQuery = url_decode(extract(@"[?&]q=([^&]+)", 1, csReferer))
| where isnotempty(SearchQuery)
| where SearchQuery has_any (DorkPatterns) or SearchQuery has_any (SensitiveTerms)
| extend HasDorkOperator = SearchQuery has_any (DorkPatterns)
| extend HasSensitiveTerm = SearchQuery has_any (SensitiveTerms)
| extend DetectionBranch = "referer_dork"
| project TimeGenerated, cIP, csUsername, csMethod, csUriStem, csUriQuery, SearchQuery, csReferer, HasDorkOperator, HasSensitiveTerm, scStatus, DetectionBranch;
let SensitivePathAccess = W3CIISLog
| where TimeGenerated > ago(24h)
| where csUriStem has_any (".env", ".git/config", ".git/HEAD", "wp-config.php", "web.config", "config.php", ".htpasswd", "/backup", "database.sql", "/credentials", "/.aws/credentials", "/.ssh/id_rsa", "phpinfo.php", "/server-status", "/elmah.axd", "/.DS_Store")
| extend HasDorkOperator = false
| extend HasSensitiveTerm = false
| extend SearchQuery = ""
| extend DetectionBranch = "sensitive_path_access"
| project TimeGenerated, cIP, csUsername, csMethod, csUriStem, csUriQuery, SearchQuery, csReferer, HasDorkOperator, HasSensitiveTerm, scStatus, DetectionBranch;
RefererDorks
| union SensitivePathAccess
| sort by TimeGenerated desc

medium severity low confidence

Data Sources

Application Log: Application Log Content Network Traffic: Network Traffic Content IIS Web Server Logs (W3CIISLog)

Required Tables

W3CIISLog

False Positives

Legitimate users reaching public web content via normal search engine queries that happen to contain sensitive keywords (e.g., searching for 'admin portal login guide' and landing on your documentation)
Security researchers and authorized penetration testers performing scheduled reconnaissance assessments against your domains
Search engine crawlers (Googlebot, Bingbot, DuckDuckBot) probing robots.txt, sitemap.xml, and other indexed paths as part of normal site indexing
Automated vulnerability scanners (Qualys, Nessus, Burp Suite Enterprise) probing for sensitive file paths during authorized scheduled scans
Web monitoring and uptime services that access known paths for availability checks, potentially triggering the sensitive path branch

Splunk

spl

index=web (sourcetype=iis OR sourcetype=access_combined OR sourcetype="apache:access")
| eval uri_lower=lower(cs_uri_stem)
| eval referer_lower=lower(cs_referer)
| eval is_search_referrer=if(match(referer_lower, "(google\.com/search|bing\.com/search|search\.yahoo\.com|duckduckgo\.com|yandex\.com/search)"), 1, 0)
| eval is_sensitive_path=if(match(uri_lower, "(\.env|\.git/config|\.git/head|wp-config\.php|web\.config|config\.php|\.htpasswd|/backup|database\.sql|/credentials|\.aws/credentials|\.ssh/id_rsa|phpinfo\.php|/server-status|/elmah\.axd|\.ds_store)"), 1, 0)
| where is_search_referrer=1 OR is_sensitive_path=1
| rex field=cs_referer "[?&]q=(?<raw_query>[^&]+)"
| eval search_query=urldecode(raw_query)
| eval query_lower=lower(coalesce(search_query, ""))
| eval has_dork_operator=if(match(query_lower, "(filetype:|ext:|inurl:|intitle:|intext:|site:|cache:|allintitle:|allinurl:)"), 1, 0)
| eval has_sensitive_term=if(match(query_lower, "(password|passwd|credential|api_key|apikey|secret|token|config|backup|\.env|admin|vpn|portal|jira|confluence|database)"), 1, 0)
| eval detection_branch=case(
    is_search_referrer=1 AND (has_dork_operator=1 OR has_sensitive_term=1), "referer_dork",
    is_sensitive_path=1, "sensitive_path_access",
    1=1, "other")
| where detection_branch != "other"
| eval risk_score=case(
    detection_branch="referer_dork" AND has_dork_operator=1 AND has_sensitive_term=1, 3,
    detection_branch="referer_dork" AND (has_dork_operator=1 OR has_sensitive_term=1), 2,
    detection_branch="sensitive_path_access" AND sc_status=200, 3,
    detection_branch="sensitive_path_access", 1,
    1=1, 1)
| table _time, c_ip, cs_username, cs_method, cs_uri_stem, cs_uri_query, search_query, has_dork_operator, has_sensitive_term, sc_status, detection_branch, risk_score
| sort - risk_score, - _time

medium severity low confidence

Data Sources

Application Log: Application Log Content Network Traffic: Network Traffic Content IIS/Apache Web Server Access Logs

Required Sourcetypes

iis access_combined

False Positives

Legitimate users landing on public web resources via search queries that happen to contain sensitive keywords in the search terms
Authorized penetration testers and security researchers performing scheduled assessments against your web infrastructure
Search engine crawlers (Googlebot, Bingbot) probing known site paths including robots.txt-disallowed directories that may contain sensitive path patterns
Automated vulnerability scanners performing authorized scans that enumerate common sensitive paths as part of their check list
Web content delivery monitoring tools that periodically probe known application paths for availability and performance checks

Elastic Security (EQL)

eql

// T1593.002 — Search Engine Dorking
any where event.dataset : ("iis.access", "apache_http_server.access")
  and (http.request.referrer : (
    "*google.com/search*", "*bing.com/search*", "*duckduckgo.com*"
  ) and http.request.referrer : (
    "*filetype%3A*", "*inurl%3A*", "*intitle%3A*", "*site%3A*"
  ))
  or url.path : ("/.env", "/.git/config", "/wp-config.php", "/.htpasswd",
    "/backup*", "/.aws/credentials", "/phpinfo.php")

medium severity low confidence

Data Sources

Web Server Logs IIS Logs

Required Tables

logs-apache_http_server.* logs-iis.access-*

False Positives

Legitimate users reaching public web content via normal search engine queries that happen to contain sensitive keywords (e.g., searching for 'admin portal login guide' and landing on your documentation)
Security researchers and authorized penetration testers performing scheduled reconnaissance assessments against your domains
Search engine crawlers (Googlebot, Bingbot, DuckDuckBot) probing robots.txt, sitemap.xml, and other indexed paths as part of normal site indexing
Automated vulnerability scanners (Qualys, Nessus, Burp Suite Enterprise) probing for sensitive file paths during authorized scheduled scans

IBM QRadar (AQL)

sql

SELECT
    DATEFORMAT(devicetime, 'yyyy-MM-dd HH:mm:ss') AS "EventTime",
    LOGSOURCENAME(logsourceid) AS "LogSource",
    LOGSOURCETYPENAME(devicetype) AS "LogSourceType",
    "username", "sourceip", "destinationip",
    "eventid", "deviceaction", "message",
    CASE
        WHEN LOWER("referer") ILIKE '%google.com/search%' AND (LOWER("referer") ILIKE '%filetype%3a%' OR LOWER("referer") ILIKE '%inurl%3a%') OR LOWER("requesturl") ILIKE '%.env%' OR LOWER("requesturl") ILIKE '%wp-config%' THEN 8
        ELSE 4
      END AS "RiskScore"
  FROM events
  WHERE (LOWER("referer") ILIKE '%google.com/search%' AND (LOWER("referer") ILIKE '%filetype%3a%' OR LOWER("referer") ILIKE '%inurl%3a%') OR LOWER("requesturl") ILIKE '%.env%' OR LOWER("requesturl") ILIKE '%wp-config%')
    AND LOGSOURCETYPENAME(devicetype) NOT IN ('SIM Audit', 'Custom Rule Engine')
  ORDER BY "RiskScore" DESC, "EventTime" DESC
  LAST 24 HOURS

medium severity low confidence

Data Sources

QRadar SIEM Windows Security Events Network Firewall Logs Syslog

Required Tables

events

False Positives

Legitimate users reaching public web content via normal search engine queries that happen to contain sensitive keywords (e.g., searching for 'admin portal login guide' and landing on your documentation)
Security researchers and authorized penetration testers performing scheduled reconnaissance assessments against your domains
Search engine crawlers (Googlebot, Bingbot, DuckDuckBot) probing robots.txt, sitemap.xml, and other indexed paths as part of normal site indexing
Automated vulnerability scanners (Qualys, Nessus, Burp Suite Enterprise) probing for sensitive file paths during authorized scheduled scans

Sumo Logic CSE

sql

_sourceCategory=*web* OR _sourceCategory=*iis* OR _sourceCategory=*apache*
| parse regex "(?<client_ip>\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}) - (?<user>[^ ]+) .* \"(?<method>[A-Z]+) (?<uri>[^ ]+).*\" (?<status>\\d+)"
| count by client_ip, uri
| sort by _count desc

medium severity low confidence

Data Sources

Sumo Logic Cloud SIEM Log Sources via Sumo Logic Collector

Required Tables

web/access iis/access

False Positives

Legitimate users reaching public web content via normal search engine queries that happen to contain sensitive keywords (e.g., searching for 'admin portal login guide' and landing on your documentation)
Security researchers and authorized penetration testers performing scheduled reconnaissance assessments against your domains
Search engine crawlers (Googlebot, Bingbot, DuckDuckBot) probing robots.txt, sitemap.xml, and other indexed paths as part of normal site indexing
Automated vulnerability scanners (Qualys, Nessus, Burp Suite Enterprise) probing for sensitive file paths during authorized scheduled scans

Google Chronicle / SecOps

yaral

rule t1593_002_search_engines {
  meta:
    author = "df00tech"
    description = "Detects Search Engines (T1593.002)"
    mitre_attack_tactic = "TA0043"
    mitre_attack_technique = "T1593.002"
    confidence = "low"
    severity = "medium"
  events:
    $e.metadata.event_type = "NETWORK_HTTP"
    $e.principal.ip != ""
  condition:
    $e
}

medium severity low confidence

Data Sources

Google Chronicle SIEM Chronicle UDM

Required Tables

NETWORK_HTTP NETWORK_CONNECTION

False Positives

Legitimate users reaching public web content via normal search engine queries that happen to contain sensitive keywords (e.g., searching for 'admin portal login guide' and landing on your documentation)
Security researchers and authorized penetration testers performing scheduled reconnaissance assessments against your domains
Search engine crawlers (Googlebot, Bingbot, DuckDuckBot) probing robots.txt, sitemap.xml, and other indexed paths as part of normal site indexing
Automated vulnerability scanners (Qualys, Nessus, Burp Suite Enterprise) probing for sensitive file paths during authorized scheduled scans

CrowdStrike LogScale (CQL)

cql

#event_simpleName = "ProcessRollup2"
| ImageFileName = /python|curl|wget|nmap|masscan/i
| TechniqueLabel := "T1593.002 - Reconnaissance"
| table([@timestamp, ComputerName, UserName, ImageFileName, CommandLine, TechniqueLabel])

medium severity low confidence

Data Sources

CrowdStrike Falcon CrowdStrike LogScale

Required Tables

HttpRequest ProcessRollup2

False Positives

Legitimate users reaching public web content via normal search engine queries that happen to contain sensitive keywords (e.g., searching for 'admin portal login guide' and landing on your documentation)
Security researchers and authorized penetration testers performing scheduled reconnaissance assessments against your domains
Search engine crawlers (Googlebot, Bingbot, DuckDuckBot) probing robots.txt, sitemap.xml, and other indexed paths as part of normal site indexing
Automated vulnerability scanners (Qualys, Nessus, Burp Suite Enterprise) probing for sensitive file paths during authorized scheduled scans

Search Engines

Data Sources

Required Tables

False Positives

Data Sources

Required Sourcetypes

False Positives

Data Sources

Required Tables

False Positives

Data Sources

Required Tables

False Positives

Data Sources

Required Tables

False Positives

Data Sources

Required Tables

False Positives

Data Sources

Required Tables

False Positives

Response Playbook

Investigation Guide

Atomic Red Team Tests

Unlock Pro Content

Related Detections

Parent Technique

Related Sub-techniques

Same Tactic: Reconnaissance

Popular Detections