Rsyslog - Parse Json and enrich IP with Geolocation using Maxmind GeoLite2 City and ISP
Today we wanted to parse some json logs which we had in a file using Rsyslog and enrich them with Geolocation information regarding the city and the ISP an IP belongs. We initially tried with Logstash (see relevant previous blog post) but it was too slow. Thus we decided to try with parsing with Rsyslog. The file, let’s call it /var/log/input-geo.json
had the following structure and content. (It is the same as in Logstash post):
{"name":"Christos","src_ip":"63.145.248.101","age":12}
{"name":"Nikos","src_ip":"98.158.156.175","age":10}
Rsyslog has MaxMind/GeoIP DB lookup (mmdblookup) module which adds information about the geographical location of IP addresses, based on data from the Maxmind GeoLite2 databases. In our case we used:
- GeoLite2 City database (free)
- GeoIP2 ISP Database (commercial licence)
We wanted to parse the JSON file enrich the src_ip
field and store the geolocation information to src_geoip
. Then we forward the message to Elasticsearch using omelasticsearch: Elasticsearch Output Module. For debugging purposes we also enabled file output. Thus , the configuration (rsyslog.conf
) looked like the following:
# /etc/rsyslog.conf Configuration file for rsyslog.
#
# For more information see
# /usr/share/doc/rsyslog-doc/html/rsyslog_conf.html
#
# Default logging rules can be found in /etc/rsyslog.d/50-default.conf
#################
#### MODULES ####
#################
module(load="imuxsock") # provides support for local system logging
# module(load="imklog") # provides kernel logging support
module(load="immark") # provides --MARK-- message capability
# $ModLoad imuxsock # provides support for local system logging
# $ModLoad imklog # provides kernel logging support (previously done by rklogd)
#$ModLoad immark # provides --MARK-- message capability
# provides UDP syslog reception
# $ModLoad imudp
# $UDPServerRun 514
# provides TCP syslog reception
# $ModLoad imtcp
# $InputTCPServerRun 514
module(load="builtin:omfile")
module(load="mmnormalize") # parser using liblognorm
module(load="mmjsonparse") #for parsing CEE-enhanced syslog messages
module(load="imfile")
module(load="mmdblookup" container="!src_geo")
###########################
#### GLOBAL DIRECTIVES ####
###########################
#
# Use traditional timestamp format.
# To enable high precision timestamps, comment out the following line.
#
$ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat
# Filter duplicated messages
$RepeatedMsgReduction on
#
# Set the default permissions for all log files.
#
$FileOwner root
$FileGroup root
$FileCreateMode 0640
$DirCreateMode 0755
$Umask 0022
$PrivDropToUser syslog
$PrivDropToGroup syslog
# General globals
global(net.enableDNS="off")
# Remove Control Chars
global(parser.escapeControlCharactersOnReceive="off" )
#
# Where to place spool files
#
$WorkDirectory /var/spool/rsyslog
global(workDirectory="/var/spool/rsyslog")
#
# Include all config files in /etc/rsyslog.d/
#
# $IncludeConfig /etc/rsyslog.d/*.conf
#################
#### Inputs ####
#################
# provides UDP syslog reception
module(load="imudp")
input(type="imudp" port="514")
# provides TCP syslog reception
module(load="imtcp")
input(type="imtcp" port="514" )
# File 1
input(type="imfile"
File="/opt/input-geo.json"
Tag="geoip"
PersistStateInterval="1"
freshStartTail="off")
#################
### Templates ###
#################
# this is for formatting our syslog in JSON with @timestamp for Json messages
template(name="geoip"
type="list") {
constant(value="{")
constant(value="\"@version\":\"1")
constant(value="\",\"@timestamp\":\"") property(name="timegenerated" dateFormat="rfc3339")
constant(value="\",\"host\":\"") property(name="hostname")
constant(value="\",\"type\":\"syslog")
constant(value="\",\"syslog_timestamp\":\"") property(name="timereported" dateFormat="rfc3164" format="json")
constant(value="\",\"syslog_hostname\":\"") property(name="hostname" format="json")
constant(value="\",\"syslog_program\":\"") property(name="programname" format="json")
constant(value="\",\"syslog_message\":\"") property(name="msg" format="json")
constant(value="\",\"received_at\":\"") property(name="timegenerated" dateFormat="rfc3339")
constant(value="\",\"received_from\":\"") property(name="fromhost" format="json")
constant(value="\",\"path\":\"") property(name="$!metadata!filename")
constant(value="\",\"name\":\"") property(name="$!name" format="json")
constant(value="\",\"src_ip\":\"") property(name="$!src_ip" format="json")
constant(value="\",\"src_geoip\":{") property(name="$!src_geo" position.from="2")
constant(value="}\n")
}
#################
#### Actions ####
#################
###Fix text to utf8, disabled for now
###action(type="mmutf8fix")
#action(type="omfile" file="/tmp/logtesting" template="RSYSLOG_DebugFormat")
if ($syslogtag contains 'geoip') then {
action(type="mmjsonparse" cookie="")
if $parsesuccess == "OK" then {
# https://github.com/rsyslog/rsyslog/issues/1650
# Add MaxMind/GeoIP DB lookup information
action( type="mmdblookup" mmdbfile="/etc/rsyslog.d/GeoLite2-City.mmdb" key="!src_ip"
fields=[":timezone:!location!time_zone",
":latitude:!location!latitude",
":longitude:!location!longitude",
":dma_code:!location!metro_code",
":city_name:!names!en",
":continent_code:!continent!code",
":country_code2:!country!iso_code",
":country_code3:!country!iso_code",
":country_name:!country!names!en",
":postal_code:!postal!code",
":region_code:!subdivisions!iso_code",
":region_name:!subdivisions!names!en"
]
)
# Add MaxMind/GeoIP ISP DB lookup information
action( type="mmdblookup" mmdbfile="/etc/rsyslog.d/GeoIP2-ISP.mmdb" key="!src_ip"
fields=[":asn:!autonomous_system_number",
":as_org:!autonomous_system_organization",
"!isp",
"!organization"
]
)
# Add IP to src_geo object
set $!src_geo!ip = $!src_ip;
# If geolocation was successful, add lat,lon in a special location object
if $! contains 'latitude' then {
set $!src_geo!location!lat = $!src_geo!latitude;
set $!src_geo!location!lon = $!src_geo!longitude;
}
# Output to a new file
action(type="omfile" File="/tmp/json-output" template="geoip")
# Output to Elasticsearch
action(type="omelasticsearch"
server="192.168.1.10"
serverport="9200"
template="geoip"
searchIndex="geoindex"
dynSearchIndex="on"
searchType="syslog"
bulkmode="on" # use the Bulk API
queue.dequeuebatchsize="5000" # ES bulk size
queue.size="100000" # capacity of the action queue
queue.workerthreads="5" # 5 workers for the action
action.resumeretrycount="-1" # retry indefinitely if ES is unreachable
errorfile="/var/log/omelasticsearch.log"
)
} else if $parsesuccess == "FAIL" then {
action(type="omfile" File="/tmp/json-parse-failure")
}
}
To run Logstash we chose the quickest way, hence run it in Docker , so we have put all required Logstash configuration, logs and Maxmind databases in a directory:
linux@linux-VM:~$ ls -l
-rwxrwxr-x 1 linux linux 26331174 Aug 6 19:11 GeoIP2-ISP.mmdb
-rwxrwxr-x 1 linux linux 51469823 Aug 6 19:11 GeoLite2-City.mmdb
-rw-rw-r-- 1 linux linux 107 Aug 18 19:13 input-geo.json
-rwxrwxr-x 1 linux linux 2244 Aug 18 19:32 rsyslog.conf
Running Rsyslog on Docker is not so relatively easy as the Docker images are a work in progress in Github (last checked 2018-08-18). So we created a custom image in our personal account, called manios/rsyslog. Let’s run a Docker container with Rsyslog 8.37.0:
docker run \
-d \
--name myrsyslog \
-v $(pwd)/GeoLite2-City.mmdb:/etc/rsyslog.d/GeoLite2-City.mmdb \
-v $(pwd)/GeoIP2-ISP.mmdb:/etc/rsyslog.d/GeoIP2-ISP.mmdb \
-v $(pwd)/input-geo.json:/opt/input-geo.json \
-v $(pwd)/rsyslog.conf:/etc/rsyslog.conf \
manios/rsyslog:8.37.0
Now Rsyslog is running. Let’s run a Bash shell inside the container:
docker exec -it myrsyslog /bin/bash
While Rsyslog is running, if you examine the contents of /tmp/json-output
file, you will notice that the messages contain a lot of geolocation information and they will resemble to the following:
{
"@version": "1",
"@timestamp": "2018-08-18T06:59:14.640903+00:00",
"host": "b576d0a6022b",
"type": "syslog",
"syslog_timestamp": "Aug 18 06:59:14",
"syslog_hostname": "b576d0a6022b",
"syslog_program": "geoip",
"syslog_message": "{\"name\":\"Christos\",\"src_ip\":\"63.145.248.101\",\"age\":12}",
"received_at": "2018-08-18T06:59:14.640903+00:00",
"received_from": "",
"path": "/opt/input-geo.json",
"name": "Christos",
"src_ip": "63.145.248.101",
"src_geoip": {
"timezone": "America\/Los_Angeles",
"latitude": 37.925500,
"longitude": -122.343700,
"dma_code": 807,
"city_name": null,
"continent_code": "NA",
"country_code2": "US",
"country_code3": "US",
"country_name": "UnitedStates",
"postal_code": "94804",
"region_code": null,
"region_name": null,
"asn": 209,
"as_org": "QwestCommunicationsCompany,LLC",
"isp": "CenturyLink",
"organization": "CenturyLink",
"ip": "63.145.248.101",
"location": {
"lat": 37.9255,
"lon": -122.3437
}
}
}
Unfortunately, current version of Rsyslog contains a known bug (#1650) which strips the space characters from strings. Notice that
"country_name": "UnitedStates"
should be
"country_name": "United States"
for example. Also, if you compare to our previous post (TODO add link here
), region_code
and region_name
should have a value. You can easily spot the differences between Rsyslog and Logstash output in the following image:
We hope this article helped you get up and running with Rsyslog and the use of MaxMind/GeoIP DB lookup (mmdblookup) module!
Comments