#!/usr/bin/perl -w
#$|=1;

sub parse_address($)
{
  my $textadr = $_[0];
  $textadr =~ /([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/;
  return chr($1).chr($2).chr($3).chr($4);
}

sub get_country_code {
    my $country = $_[0];
    my $country_code;
    $country="GB" if $country eq "UK";
    if (exists $countries{$country})
    {
	$country_code = $countries{$country};
    } else
    {
	$country_code = $country_alloc++;
	$countries{$country} = $country_code;
	print COUNTRIES "$country: $country_code\n";
    }
    return $country_code;
}

# sub number_address($)
# {
#   my $binadr = $_[0];
#   return
#     (ord(substr($binadr,0,1)) << 24) |
#     (ord(substr($binadr,1,1)) << 16) |
#     (ord(substr($binadr,2,1)) << 8) |
#     ord(substr($binadr,3,1));
# }

open MAP, ">ipblocks.map";
binmode MAP;
open COUNTRIES, ">countries.dat";
$country_alloc = 1;
BLOCK: while (<>)
{
    if (/\|.*\|.*\|/) { # stats format
	# arin|US|ipv4|3.0.0.0|16777216|19880223|assigned
	if (/^([^|]+)\| # authority
              ([^|]+)\| # country
	      ipv4\| # class
              ([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\| # IP
	    ([0-9]+)\| # size
            ([0-9]{8})\|
	    (?:allocated|assigned)/sx) {

	    my $country = $2;
	    next BLOCK if $country eq "AP"; # APNIC
	    my $country_code = get_country_code($country);
	    my $beg_addr0 = $3;
	    my $beg_addr1 = $4;
	    my $beg_addr2 = $5;
	    my $beg_addr3 = $6;
	    my $delta = $7-1;
	    my $end_addr0 = $beg_addr0 + ($delta >> 24) & 0xFF;
	    my $end_addr1 = $beg_addr1 + ($delta >> 16) & 0xFF;
	    my $end_addr2 = $beg_addr2 + ($delta >> 8) & 0xFF;
	    my $end_addr3 = $beg_addr3 + ($delta >> 0) & 0xFF;
	    print MAP chr($beg_addr0), chr($beg_addr1),
	    chr($beg_addr2), chr($beg_addr3),
	    chr($end_addr0), chr($end_addr1),
	    chr($end_addr2), chr($end_addr3),
	    chr($country_code);	    
	}
    } else { # whois format
	if (/^$/)
	{
	    $adr_start="";
	    $adr_end="";
	    $descr="";
	} 
	elsif (/^(?:\*na|netname):\s+(.*)$/)
	{
	    $name=$1;
	}
	elsif (/^(?:\*in|inetnum):\s+([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) - ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/)
	{
	    $adr_start=$1;
	    $adr_end=$2;
	}
	elsif (/^(?:\*de|descr):\s+(.*)$/)
	{
	    $descr .= "$1 ";
	}
	elsif (/^(?:\*cy|country):\s+([A-Za-z]{2})$/)
	{
	    $country = uc($1);

	    # Some IP blocks in French oversea possessions are classified in FR
	    # whereas for routing purposes it's better to classify them into their
	    # own domains (ISO-3166). Some heuristics are used: any description
	    # matching the name of the territory (but not as "<name> Street")
	    # is very probably for a block inside that territory.
	    if ($country eq "FR") {
		if($descr =~ /([0-9]{2})[0-9]{3}/ && $1!=97 && $1!=98) {
		    # there's a ZIP code that's not for an oversea territory
		} elsif($descr =~ /Reunion/i &&
			$descr !~ /(?:r|rue|place) de la Reunion/i &&
			$descr !~ /Reunion des/i) {
		    $country = "RE";
		    print "$descr: Reunion\n";
		} elsif ($descr =~ /Guadeloupe/i &&
			 $descr !~ /(?:r|rue|place) de la Guadeloupe/i) {
		    $country = "GP";
		    print "$descr: Guadeloupe\n";
		} elsif ($descr =~ /Martinique/i &&
			 $descr !~ /(?:r|rue|place) de la Martinique/i) {
		    $country = "MQ";
		} elsif ($descr =~ /Guyane/i &&
			 $descr !~ /(?:r|rue|place) de la Guyane/i) {
		    $country = "GF";
		    print "$descr: Guyane\n";
		} elsif ($descr =~ /Nouvelle Caledonie/i &&
			 $descr !~ /(?:r|rue|place) de la Nouvelle Caledonie/i ||
			 $descr =~ /Noumea/i &&
			 $descr !~ /(?:r|rue|place) de Noumea/i) {
		    $country = "NC";
		    print "$descr: Nouvelle Caledonie\n";
		} elsif ($descr =~ /Polynesie/i &&
			 $descr !~ /Delegation/i &&
			 $descr !~ /(?:r|rue|place) de la Polynesie/i) {
		    $country = "PF";
		    print "$descr: Polynesie Francaise\n";
		} 
	    }

	    $country_code = get_country_code($country);

	    next if !$adr_start or !$adr_end
		or ($country eq "AU" && $name =~ /BLOCK/)
		or ($name =~ /^IANA-|APNIC-|RIPE-|ARIN-/);
	    # APNIC uses these to signal
	    # enormous "early registration blocks" or blocks managed
	    # by other entities , which it assigns to Australia
	    $bin_adr_start = parse_address($adr_start);
	    $bin_adr_end = parse_address($adr_end);

#     $netmask = number_address($bin_adr_start) ^ number_address($bin_adr_end);
#     for($blockwidth=0; $blockwidth<32; $blockwidth++)
#     {
#       last if (($netmask & 1) == 0);
#       $netmask >>= 1;
#     }
#     if ($netmask != 0)
#     {
#       print "Wrong block: $adr_start - $adr_end\n";
#       next BLOCK;
#     }
#     $blockwidth = 32-$blockwidth;
	    # print "$adr_start/$blockwidth $country_code\n";
	    print MAP $bin_adr_start, $bin_adr_end, chr($country_code);
	    undef $adr_start;
	    undef $adr_end;
	}
    }
}
close MAP;
close COUNTRIES;
