Rsyslog - Parse Json and enrich IP with Geolocation using Maxmind GeoLite2 City and ISP

6 minute read

Today we wanted to parse some json logs which we had in a file using Rsyslog and enrich them with Geolocation information regarding the city and the ISP an IP belongs. We initially tried with Logstash (see relevant previous blog post) but it was too slow. Thus we decided to try with parsing with Rsyslog. The file, let’s call it /var/log/input-geo.json had the following structure and content. (It is the same as in Logstash post):

{"name":"Christos","src_ip":"63.145.248.101","age":12}
{"name":"Nikos","src_ip":"98.158.156.175","age":10}

Rsyslog has MaxMind/GeoIP DB lookup (mmdblookup) module which adds information about the geographical location of IP addresses, based on data from the Maxmind GeoLite2 databases. In our case we used:

We wanted to parse the JSON file enrich the src_ip field and store the geolocation information to src_geoip. Then we forward the message to Elasticsearch using omelasticsearch: Elasticsearch Output Module. For debugging purposes we also enabled file output. Thus , the configuration (rsyslog.conf) looked like the following:

#  /etc/rsyslog.conf	Configuration file for rsyslog.
#
#			For more information see
#			/usr/share/doc/rsyslog-doc/html/rsyslog_conf.html
#
#  Default logging rules can be found in /etc/rsyslog.d/50-default.conf


#################
#### MODULES ####
#################

module(load="imuxsock") # provides support for local system logging
# module(load="imklog")   # provides kernel logging support
module(load="immark")  # provides --MARK-- message capability
# $ModLoad imuxsock # provides support for local system logging
# $ModLoad imklog   # provides kernel logging support (previously done by rklogd)
#$ModLoad immark  # provides --MARK-- message capability

# provides UDP syslog reception
# $ModLoad imudp
# $UDPServerRun 514

# provides TCP syslog reception
# $ModLoad imtcp
# $InputTCPServerRun 514
module(load="builtin:omfile")
module(load="mmnormalize") # parser using liblognorm
module(load="mmjsonparse") #for parsing CEE-enhanced syslog messages
module(load="imfile")
module(load="mmdblookup" container="!src_geo")

###########################
#### GLOBAL DIRECTIVES ####
###########################

#
# Use traditional timestamp format.
# To enable high precision timestamps, comment out the following line.
#
$ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat

# Filter duplicated messages
$RepeatedMsgReduction on

#
# Set the default permissions for all log files.
#
$FileOwner root
$FileGroup root
$FileCreateMode 0640
$DirCreateMode 0755
$Umask 0022
$PrivDropToUser syslog
$PrivDropToGroup syslog

# General globals
global(net.enableDNS="off")

# Remove Control Chars
global(parser.escapeControlCharactersOnReceive="off" )

#
# Where to place spool files
#
$WorkDirectory /var/spool/rsyslog

global(workDirectory="/var/spool/rsyslog")
#
# Include all config files in /etc/rsyslog.d/
#
# $IncludeConfig /etc/rsyslog.d/*.conf



#################
#### Inputs  ####
#################


# provides UDP syslog reception
module(load="imudp")
input(type="imudp" port="514")

# provides TCP syslog reception
module(load="imtcp")
input(type="imtcp" port="514" )

# File 1
input(type="imfile"
      File="/opt/input-geo.json"
      Tag="geoip"
      PersistStateInterval="1"
      freshStartTail="off")

#################
### Templates ###
#################

# this is for formatting our syslog in JSON with @timestamp for Json messages
template(name="geoip"
  type="list") {
    constant(value="{")
      constant(value="\"@version\":\"1")
      constant(value="\",\"@timestamp\":\"")		property(name="timegenerated" dateFormat="rfc3339")
      constant(value="\",\"host\":\"")			    property(name="hostname") 
      constant(value="\",\"type\":\"syslog")
      constant(value="\",\"syslog_timestamp\":\"")	property(name="timereported" dateFormat="rfc3164"  format="json")
      constant(value="\",\"syslog_hostname\":\"")	property(name="hostname" format="json")
      constant(value="\",\"syslog_program\":\"")	property(name="programname" format="json")
      constant(value="\",\"syslog_message\":\"")	property(name="msg" format="json")
      constant(value="\",\"received_at\":\"")		property(name="timegenerated" dateFormat="rfc3339")
      constant(value="\",\"received_from\":\"")	    property(name="fromhost" format="json")
      constant(value="\",\"path\":\"")      	    property(name="$!metadata!filename")
      constant(value="\",\"name\":\"")              property(name="$!name" format="json")
      constant(value="\",\"src_ip\":\"")            property(name="$!src_ip" format="json")
      constant(value="\",\"src_geoip\":{")          property(name="$!src_geo" position.from="2")
      constant(value="}\n")
}


#################
#### Actions ####
#################


###Fix text to utf8, disabled for now
###action(type="mmutf8fix")

#action(type="omfile" file="/tmp/logtesting" template="RSYSLOG_DebugFormat")


if ($syslogtag contains 'geoip') then {
    
    
    action(type="mmjsonparse" cookie="")
    
    if $parsesuccess == "OK" then {

        # https://github.com/rsyslog/rsyslog/issues/1650

        # Add MaxMind/GeoIP DB lookup information
        action( type="mmdblookup" mmdbfile="/etc/rsyslog.d/GeoLite2-City.mmdb" key="!src_ip" 
                fields=[":timezone:!location!time_zone",
                        ":latitude:!location!latitude",
                        ":longitude:!location!longitude",
                        ":dma_code:!location!metro_code",
                        ":city_name:!names!en",
                        ":continent_code:!continent!code",
                        ":country_code2:!country!iso_code",
                        ":country_code3:!country!iso_code",
                        ":country_name:!country!names!en",
                        ":postal_code:!postal!code",
                        ":region_code:!subdivisions!iso_code",
                        ":region_name:!subdivisions!names!en"
                        ]
        )

        # Add MaxMind/GeoIP ISP DB lookup information
        action( type="mmdblookup" mmdbfile="/etc/rsyslog.d/GeoIP2-ISP.mmdb" key="!src_ip" 
                fields=[":asn:!autonomous_system_number",
                        ":as_org:!autonomous_system_organization",
                        "!isp",
                        "!organization"
                        ]
        )
        
        # Add IP to src_geo object
        set $!src_geo!ip = $!src_ip;

        # If geolocation was successful, add lat,lon in a special location object
        if $! contains 'latitude' then {
            set $!src_geo!location!lat = $!src_geo!latitude;
            set $!src_geo!location!lon = $!src_geo!longitude;
        }

        # Output to a new file
        action(type="omfile" File="/tmp/json-output" template="geoip")

        # Output to Elasticsearch
        action(type="omelasticsearch"
            server="192.168.1.10"
            serverport="9200"
            template="geoip"  
            searchIndex="geoindex"
            dynSearchIndex="on"
            searchType="syslog"
            bulkmode="on"                   # use the Bulk API
            queue.dequeuebatchsize="5000"   # ES bulk size
            queue.size="100000"   # capacity of the action queue
            queue.workerthreads="5"   # 5 workers for the action
            action.resumeretrycount="-1"  # retry indefinitely if ES is unreachable
            errorfile="/var/log/omelasticsearch.log"
        )

    } else if $parsesuccess == "FAIL" then {
        action(type="omfile" File="/tmp/json-parse-failure")
    }
}

To run Logstash we chose the quickest way, hence run it in Docker , so we have put all required Logstash configuration, logs and Maxmind databases in a directory:

linux@linux-VM:~$ ls -l
-rwxrwxr-x 1 linux linux 26331174 Aug   6 19:11 GeoIP2-ISP.mmdb
-rwxrwxr-x 1 linux linux 51469823 Aug   6 19:11 GeoLite2-City.mmdb
-rw-rw-r-- 1 linux linux      107 Aug  18 19:13 input-geo.json
-rwxrwxr-x 1 linux linux     2244 Aug  18 19:32 rsyslog.conf

Running Rsyslog on Docker is not so relatively easy as the Docker images are a work in progress in Github (last checked 2018-08-18). So we created a custom image in our personal account, called manios/rsyslog. Let’s run a Docker container with Rsyslog 8.37.0:

docker run \
    -d \
    --name myrsyslog \
    -v $(pwd)/GeoLite2-City.mmdb:/etc/rsyslog.d/GeoLite2-City.mmdb \
    -v $(pwd)/GeoIP2-ISP.mmdb:/etc/rsyslog.d/GeoIP2-ISP.mmdb \
    -v $(pwd)/input-geo.json:/opt/input-geo.json \
    -v $(pwd)/rsyslog.conf:/etc/rsyslog.conf \
    manios/rsyslog:8.37.0

Now Rsyslog is running. Let’s run a Bash shell inside the container:

docker exec -it myrsyslog /bin/bash

While Rsyslog is running, if you examine the contents of /tmp/json-output file, you will notice that the messages contain a lot of geolocation information and they will resemble to the following:

{
    "@version": "1",
    "@timestamp": "2018-08-18T06:59:14.640903+00:00",
    "host": "b576d0a6022b",
    "type": "syslog",
    "syslog_timestamp": "Aug 18 06:59:14",
    "syslog_hostname": "b576d0a6022b",
    "syslog_program": "geoip",
    "syslog_message": "{\"name\":\"Christos\",\"src_ip\":\"63.145.248.101\",\"age\":12}",
    "received_at": "2018-08-18T06:59:14.640903+00:00",
    "received_from": "",
    "path": "/opt/input-geo.json",
    "name": "Christos",
    "src_ip": "63.145.248.101",
    "src_geoip": {
        "timezone": "America\/Los_Angeles",
        "latitude": 37.925500,
        "longitude": -122.343700,
        "dma_code": 807,
        "city_name": null,
        "continent_code": "NA",
        "country_code2": "US",
        "country_code3": "US",
        "country_name": "UnitedStates",
        "postal_code": "94804",
        "region_code": null,
        "region_name": null,
        "asn": 209,
        "as_org": "QwestCommunicationsCompany,LLC",
        "isp": "CenturyLink",
        "organization": "CenturyLink",
        "ip": "63.145.248.101",
        "location": {
            "lat": 37.9255,
            "lon": -122.3437
        }
    }
}

Unfortunately, current version of Rsyslog contains a known bug (#1650) which strips the space characters from strings. Notice that

"country_name": "UnitedStates"

should be

"country_name": "United States"

for example. Also, if you compare to our previous post (TODO add link here), region_code and region_name should have a value. You can easily spot the differences between Rsyslog and Logstash output in the following image:

Rsyslog and Logstash output diff when using Maxmind GeoIP City and ISP lookup databases

We hope this article helped you get up and running with Rsyslog and the use of MaxMind/GeoIP DB lookup (mmdblookup) module!

Comments