#!/usr/local/bin/ruby # $Id: mkjindex 44 2006-04-07 08:59:42Z yuuji $ # Last modified Tue Mar 21 21:11:48 2006 on firestorm # # \indexentry{漢字} を kakasi/chasen を用いて \indexentry{かんじ@漢字} # に変換し、makeindex に渡して *.ind を作らせる。 code4ruby = 'euc' $KCODE = code4ruby #makeindexcode = "sjis" makeindexcode = "euc" latexcode = "jis" kakasi = "kakasi -JH -KH -o" + code4ruby[0, 1] chasen = 'chasen -F %y' makeindex = 'makeindex' makeindex = 'makeindex2' require 'jcode' kanafilter = kakasi while /^-/ =~ ARGV[0] case ARGV[0] when /^-[sej]$/ filter = "nkf %s" % $& when "-c" kanafilter = chasen when "-m" ARGV.shift makeindex = ARGV[0] when "-mendex" makeindex = 'mendex -f' makeindexcode = 'e' when '-mc' ARGV.shift makeindexcode = ARGV[0] end ARGV.shift end def hira(str) str.tr("ァ-ン", "ぁ-ん") ##str.tr("ァ", "ぁ") end def purify(str) str.tr("がぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ", "かきくけこさしすせそたちつてとはひふへほはひふへほ") end def gyou(str) # s/[][]^ -/]/記号/; str.sub!(/[^A-Za-zあ-ん0-9]/, '記号') str.sub!(/[0-9]/, '数字') str.sub!(/[いうえお]/, 'あ') str.sub!(/[きくけこ]/, 'か') str.sub!(/[しすせそ]/, 'さ') str.sub!(/[ちつてと]/, 'た') str.sub!(/[にぬねの]/, 'な') str.sub!(/[ひふへほ]/, 'は') str.sub!(/[みむめも]/, 'ま') str.sub!(/[ゆよ]/, 'や') str.sub!(/[りるれろ]/, 'ら') str.sub!(/[を]/, 'わ') str end indexfile = ARGV[0] newindexfile = indexfile.sub(/\.\w+$/, '')+".ind" tmpfile = "tmptmp"; #"/tmp/mkji$$" prenkf = "nkf -" + code4ruby[0, 1] mkidxnkf = "nkf -" + makeindexcode[0, 1] postnkf = "nkf -" + latexcode[0, 1] open("| #{prenkf} #{indexfile}", "r"){|jpindex| open("| #{prenkf} #{indexfile} | #{kanafilter}", "r"){|kanaindex| open("| #{mkidxnkf} | #{makeindex} | #{postnkf} > #{tmpfile}", "w"){|w| while line=jpindex.gets nl = hira(kanaindex.gets) #nl = kanaindex.gets if $DEBUG STDERR.printf "line=%s\n", line STDERR.printf "nl=%s\n", nl STDERR.printf "hiranl=%s\n", nl=hira(nl) end entry, pages = line.scan(/\\indexentry\{(.*)\}\{(\d+)\}/)[0] newent = nl.scan(/\\indexentry\{(.*)\}\{(\d+)\}/)[0] if newent && entry != purify(newent[0]) w.print "\\indexentry{#{newent[0]}@#{entry}}{#{pages}}\n" else w.print line end end } } } lastchr='' open("| #{prenkf} #{tmpfile}", "r"){|tread| open("| #{kanafilter} < #{tmpfile} | #{prenkf}", "r"){|tmp2| open("| #{postnkf} > #{newindexfile}", "w"){|out| while line = tread.gets hr = tmp2.gets if /\\item\s*((\S).*),/ =~ hr word = hira($1) char = gyou(purify(hira($2.upcase))) if char != lastchr out.puts " \\item 【#{char}】" lastchr = char end end ## line.sub!(/(\\item\s+)([&%\#~_^{}\$\\].*)/, "\\1\\verb,\\2,") line.sub!(/(\\item\s+)([&%\#~_^{}\$].*)/, "\\1\\verb,\\2,") out.print line end } } } __END__ 以下 jperl 版(kakasiのみ) #!/usr/local/bin/jperl -Leuc # # \indexentry{漢字} を kakasi を用いて \indexentry{かんじ@漢字} # に変換し、makeindex に渡して *.ind を作らせる。 $perlcode = "euc"; $makeindexcode = "sjis"; $LaTeXcode = "jis"; $kakasiopt = "-JH -KH"; $makeindex = 'makeindex'; while ($ARGV[0] =~ '^-') { $_ = $ARGV[0]; if (/-[sej]/) { $filter = "nkf $_"; } else { $kakasiopt = "$kakasiopt $_"; } shift; } $indexfile = $ARGV[0]; $newindexfile = substr($indexfile, 0, rindex($indexfile, '.')) . ".ind"; $tmpfile = "tmptmp"; #"/tmp/mkji$$"; $prenkf = "nkf -" . substr($perlcode, 0, 1); $kakasiopt = "$kakasiopt -o$perlcode"; $mkidxnkf = "nkf -" . substr($makeindexcode, 0, 1); $postnkf = "nkf -" . substr($LaTeXcode, 0, 1); open(JPINDEX, "$prenkf $indexfile|") || die "Cannot open index file $indexfile.\n"; open(KANAINDEX, "kakasi $kakasiopt < $indexfile|") || die "Cannot exec kakasi.\n"; open(MAKEINDEX, "| $mkidxnkf | $makeindex | $postnkf > $tmpfile") || die "Cannot exec $makeindex\n"; select MAKEINDEX; # Path#1 while () { ($entry, $where) = (/\\indexentry{(.*)}{(\d+)}/); ($newent) = ( =~ /\\indexentry{(.*)}{(\d+)}/); $newent = &purify($newent); if ($newent ne $entry) { print "\\indexentry{$newent\@$entry}{$where}\n"; } else { print; } } close(JPINDEX); close(MAKEINDEX); open(TMPREAD, "$prenkf $tmpfile|") || die "Cannot open temp file.\n"; open(TMPREAD2, "kakasi $kakasiopt <$tmpfile|") || die "Cannot open t2"; open(OUTPUT, "|$postnkf >$newindexfile") || die "Cannout output to file\n"; select OUTPUT; while ($line = ) { if ( =~ /\\item\s+((\S).*),/) { $word = $1; $char = &gyou(&purify("\U$2")); if ($char ne $lastchr) { print " \\item 【$char】\n"; $lastchr = $char; } } $line =~ s/(\\item\s+)([&%\#~_^{}\$\\].*)/$1\\verb,$2,/; print $line; } close(TMPREAD); close(OUTPUT); #unlink $tmpfile; sub purify { # print STDOUT "hoge=$_[0]\t"; local($str) = (@_); #$_[0] =~ $str =~ y/がぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ/かきくけこさしすせそたちつてとはひふへほはひふへほ/; return $str; } sub gyou { local($str) = (@_); $_ = $str; # s/[][]^ -/]/記号/; s/[^A-Za-zあ-ん0-9]/記号/; s/[0-9]/数字/; s/[いうえお]/あ/; s/[きくけこ]/か/; s/[しすせそ]/さ/; s/[ちつてと]/た/; s/[にぬねの]/な/; s/[ひふへほ]/は/; s/[みむめも]/ま/; s/[ゆよ]/や/; s/[りるれろ]/ら/; s/[を]/わ/; return; }