#!/usr/bin/perl -w
#$|=1;

sub parse_address($)
{
  my $textadr = $_[0];
  $textadr =~ /([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/;
  return chr($1).chr($2).chr($3).chr($4);
}

# sub number_address($)
# {
#   my $binadr = $_[0];
#   return
#     (ord(substr($binadr,0,1)) << 24) |
#     (ord(substr($binadr,1,1)) << 16) |
#     (ord(substr($binadr,2,1)) << 8) |
#     ord(substr($binadr,3,1));
# }

open MAP, ">ipblocks.map";
binmode MAP;
open COUNTRIES, ">countries.dat";
$country_alloc = 1;
BLOCK: while (<>)
{
  if (/^$/)
  {
    $adr_start="";
    $adr_end="";
    $descr="";
  } 
  elsif (/^(?:\*na|netname):\s+(.*)$/)
  {
    $name=$1;
  }
  elsif (/^(?:\*in|inetnum):\s+([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) - ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/)
  {
    $adr_start=$1;
    $adr_end=$2;
  }
  elsif (/^(?:\*de|descr):\s+(.*)$/)
  {
    $descr .= "$1 ";
  }
  elsif (/^(?:\*cy|country):\s+([A-Za-z]{2})$/)
  {
    $country = uc($1);

    # Some IP blocks in French oversea possessions are classified in FR
    # whereas for routing purposes it's better to classify them into their
    # own domains (ISO-3166). Some heuristics are used: any description
    # matching the name of the territory (but not as "<name> Street")
    # is very probably for a block inside that territory.
    if ($country eq "FR") {
	if($descr =~ /([0-9]{2})[0-9]{3}/ && $1!=97 && $1!=98) {
	    # there's a ZIP code that's not for an oversea territory
        } elsif($descr =~ /Reunion/i &&
	   $descr !~ /(?:r|rue|place) de la Reunion/i &&
           $descr !~ /Reunion des/i) {
	    $country = "RE";
	    print "$descr: Reunion\n";
	} elsif ($descr =~ /Guadeloupe/i &&
	         $descr !~ /(?:r|rue|place) de la Guadeloupe/i) {
	    $country = "GP";
	    print "$descr: Guadeloupe\n";
	} elsif ($descr =~ /Martinique/i &&
	         $descr !~ /(?:r|rue|place) de la Martinique/i) {
	    $country = "MQ";
	} elsif ($descr =~ /Guyane/i &&
	         $descr !~ /(?:r|rue|place) de la Guyane/i) {
	    $country = "GF";
	    print "$descr: Guyane\n";
	} elsif ($descr =~ /Nouvelle Caledonie/i &&
		 $descr !~ /(?:r|rue|place) de la Nouvelle Caledonie/i ||
                 $descr =~ /Noumea/i &&
		 $descr !~ /(?:r|rue|place) de Noumea/i) {
            $country = "NC";
	    print "$descr: Nouvelle Caledonie\n";
	} elsif ($descr =~ /Polynesie/i &&
                 $descr !~ /Delegation/i &&
		 $descr !~ /(?:r|rue|place) de la Polynesie/i) {
            $country = "PF";
	    print "$descr: Polynesie Francaise\n";
	} 
    }

    if (exists $countries{$country})
    {
      $country_code = $countries{$country};
    } else
    {
      $country_code = $country_alloc++;
      $countries{$country} = $country_code;
      print COUNTRIES "$country: $country_code\n";
    }

    next if !$adr_start or !$adr_end
         or ($country eq "AU" && $name =~ /BLOCK/)
	 or ($name =~ /^IANA-|APNIC-|RIPE-|ARIN-/);
        # APNIC uses these to signal
	# enormous "early registration blocks" or blocks managed
	# by other entities , which it assigns to Australia
    $bin_adr_start = parse_address($adr_start);
    $bin_adr_end = parse_address($adr_end);

#     $netmask = number_address($bin_adr_start) ^ number_address($bin_adr_end);
#     for($blockwidth=0; $blockwidth<32; $blockwidth++)
#     {
#       last if (($netmask & 1) == 0);
#       $netmask >>= 1;
#     }
#     if ($netmask != 0)
#     {
#       print "Wrong block: $adr_start - $adr_end\n";
#       next BLOCK;
#     }
#     $blockwidth = 32-$blockwidth;
    # print "$adr_start/$blockwidth $country_code\n";
    print MAP $bin_adr_start, $bin_adr_end, chr($country_code);
    undef $adr_start;
    undef $adr_end;
  }
}
close MAP;
close COUNTRIES;
