#!/usr/local/bin/perl -w

use strict;

# -w[h́uUse of uninitialized valuevx
$SIG{__WARN__} = sub { warn @_ unless $_[0] =~ /Use of uninitialized value/ };

$::count = 0;

#---------------------------------------------------------------
#   ݒ

# Kuzuha.pmOt@C`̒`
my %savelogformat = (
  msgid       => 0,   # bZ[WID(ʔ)
  bbsid       => 1,   # Remix^}`[hpgsbNID
  threadid    => 2,   # XbhID
  refid       => 3,   # QID
  msgtime     => 4,   # o^
  pcode       => 5,   # veNgR[h
  hostname    => 6,   # zXg
  useragent   => 7,   # [U[G[WFg
  name        => 8,   # O
  userattr    => 9,   # [U[(Wł̓[AhX)
  title       => 10,  # 薼
  msg         => 11,  # {
  msgattr     => 12,  # bZ[W(󖢎gp)
);

# ̓t@C(@futurelog HTML)̃}b`p^[
my $separator = '<HR>';
sub parse {
  my ($html) = @_;
  my %message;
  if ($html =~ m|<BLOCKQUOTE>[\r\n\s]*<PRE>(.+?)</PRE>|s) {
    $message{msg} = $1;
  } else {
    return undef;
  }
  if ($html =~ m|<A name="(\d+)"></A>|i) {
    $message{msgid} = $1;
  }
  if ($html =~ m|<A href="\?m=t\&amp;s=(\d+)\&amp;|i) {
    $message{threadid} = $1;
  }
  if ($html =~ m|<FONT size="+1" color="#fffffe"><B>([^<]+)</B></FONT>|i) {
    $message{title} = $1;
  }
  if ($html =~ m|eҁF<B>(.+?)</B>|i) {
    $message{name} = $1;
    $message{name} =~ s/<[^>]*>//g;
  }
  if ($html =~ m|<FONT size="-1">eF(\d\d\d\d)/(\d\d)/(\d\d)\(.+?\)(\d\d)(\d\d)(\d\d)b|i) {
    $message{msgtime} = sprintf('%04d-%02d-%02d %02d:%02d:%02d', $1, $2, $3, $4, $5, $6);
  }

  if ($message{msg} =~ s|<A href="#(\d+)">QlF.+</A>||ig) {
    $message{refid} = $1;
  }
  if (!$message{refid} and !$message{threadid}) {
    $message{threadid} = $message{msgid};
  }
  $message{msg} =~ s/\r\n/\r/g;
  $message{msg} =~ tr/\n/\r/;
  $message{msg} =~ s/ target=\"link\"//g;
  $message{msg} =~ s/^\r//m;
  $message{msg} =~ s/\r+$//m;

  $::count++;
  return \%message;
}

#---------------------------------------------------------------
#   

{
  @ARGV or die("ERROR: please specify input filename. $!");
  -e $ARGV[0] or die("ERROR: $ARGV[0] not found. $!");
  my $inputfile = $ARGV[0];

  undef $/;
  open (FILE, $inputfile) or die("ERROR: $ARGV[0] open error. $!");
  my $file = <FILE>;
  close FILE;

  my @data = split(/$separator/, $file);
  undef $file;

  my $datfile = $inputfile;
  $datfile =~ s/\.\w+$/\.dat/;

  open (DAT, "> $datfile") or die("ERROR: $datfile write error. $!");

  print "now parsing $inputfile...\n";
  for my $data (@data[1..$#data-1]) {
    my $message = parse($data);

    my @insertdata;
    for my $logkey (keys %savelogformat) {
      $insertdata[$savelogformat{$logkey}] = $message->{$logkey};
    }
    my $insertdata = join ',', map {(s/"/""/g or /[\r\n,]/) ? qq("$_") : $_} @insertdata;
    $insertdata =~ s/\r\n/\r/g;
    $insertdata =~ tr/\n/\r/;
    $insertdata .= "\n";

    print DAT $insertdata;
  }

  close DAT;
  print "SUCCESS! $datfile created with $::count records.";
}




1;

#---------------------------------------------------------------
#   About

=head1 O

Kuzuha.pmpOt@CRo[^[

usage)
> perl logconv.pl .\tmp\20030620.html

now parsing .\tmp\20030620.html...
SUCCESS! .\tmp\20030620.dat created with 7120 records.

=cut
