RD2HTML for Perl
#!/usr/local/bin/perl # ---------------------------------------------------------------------------- # =head1 experiment --- RD Parser a experiment for RD Parser =cut # ---------------------------------------------------------------------------- # use strict; use vars qw(%CF); package RD; sub main{ my$rdStyle='file:///D:/Web/Airemix/naruse/notes/readme.css'; my$rddoc; open(IN,'<'.$::CF{'RDParser'})||die sprintf'Cannnot open %s.',$::CF{'RDParser'}; read(IN,$rddoc,-s$::CF{'RDParser'}); close(IN); my$rd=new RD; $rd->load($rddoc); # use Data::Dumper;print euc2sjis(Dumper($rd->{objRD}));exit; my$output=$rd->toString('HTML',stylesheet=>$rdStyle); print euc2sjis($output); exit; } sub euc2sjis{ my$s=shift; $s=~s{([\xa1-\xfe]{2})|\x8e([\xa1-\xdf])|\x8f[\xa1-\xfe]{2}} { if($1){ my($c1,$c2)=unpack('C2',$1); if($c1%2){ pack('C2',($c1>>1)+($c1<0xdf?0x31:0x71),$c2-0x60-($c2<0xe0)); }else{ pack('C2',($c1>>1)+($c1<0xdf?0x30:0x70),$c2-2); } }elsif($2){#SS2 $2; }else{#SS3 "\x81\xac"; } }ego; return$s; } #------------------------------------------------------------------------------# {package RD::Generator; # public RD::Generator->new( $generatorName ) sub new{ my$class=shift; my$self={}; $class='RD::Generator::'.shift if@_; bless$self,$class; } # public $generator->generate( $rd ) sub generate{ my$self=shift; my$rd=shift; return undef; } } #------------------------------------------------------------------------------# {package RD::Generator::HTML; # public RD::Generator::HTML->new( $generator ) sub new{ my$class=shift; return'RD::Generator::'.shift if@_; } # public $generator->generate( $rd->{objRD} [, [key => value] ..] ) sub generate{ die if@_<2; my($class,$self)=ref$_[0]?(ref$_[0],shift):($_[0],bless({},shift)); my$rd=shift; my%option=@_; $option{'stylesheet'}='style.css'unless$option{'stylesheet'}; ref$rd&&ref$rd eq'ARRAY'||die'$rd must be ARRAY reference.'; my$result=sprintf<<'_HERE_',$option{'stylesheet'},_getTitle($rd)||'RD';
: %s :
_HERE_ $result.=$self->_block($rd); if(@{$self->{'footnote'}}){ my$footnote=sprintf"
%s
\n",$self->_termLabel('脚注'); $footnote.=sprintf qq(
*%d
%s
\n) ,$_+1,$_+1,$_+1,$self->{'footnote'}->[$_]for 0..$#{$self->{'footnote'}}; $result=~s/<\?com\.airemix\.naruse\.perl\.rd footnote\?>/$footnote/o; } if($self->{'headlines'}){ my$headlines=sprintf"
%s
\n
\n%s
",$self->_termLabel('目次'),$self->{'headlines'}; $result=~s/<\?com\.airemix\.naruse\.perl\.rd index\?>/$headlines/o; } $result.=<<'_HERE_'; _HERE_ return$result; } # private _getTitle( $rd ) sub _getTitle{ my$rd=shift; ref$rd||return; my$title=undef; for(@$rd){ if($_->{'name'}eq'headline'&&$_->{'level'}==1){ $title=_getPlain($_->{'content'}); last; }elsif(ref($_->{'content'})){ $title=_getTitle($_->{'content'}); $title&&last; } } return$title; } # private $generator->_block( $rd ) sub _block{ my$self=shift; my$rd=shift; $self->{'footnote'} =[]unless$self->{'footnote'}; $self->{'headlines'}=''unless exists$self->{'headlines'}; $self->{'indent'}=exists$self->{'indent'}?$self->{'indent'}+1:0; my$space=' 'x 2; my$result=''; for(@$rd){ if('pi'eq$_->{name}){ #Processor Instruction if('index'eq$_->{content}){ $result.="\n"; }elsif('footnote'eq$_->{content}){ $result.="\n"; } }elsif('headline'eq$_->{name}){ # Headline $result.=sprintf"
%s
\n",$_->{level},$self->_termLabel($_->{content}),$_->{level}; $self->{'headlines'}.=sprintf"%s %s\n",'='x$_->{level},_getPlain($_->{content}); }elsif('textblock'eq$_->{name}){ # TextBlock $result.=sprintf"%s
%s
\n",$space x$self->{'indent'},$self->_inline($_->{content}); }elsif('itemlist'eq$_->{name}or'enumlist'eq$_->{name}){ # ItemList or EnumList my$tagName='itemlist'eq$_->{name}?'UL':'OL'; $result.=sprintf"%s<%s>\n",$space x$self->{'indent'},$tagName; ++$self->{'indent'}; for(@{$_->{content}}){ # if(@{$_->{content}}==1&&$_->{content}->[0]->{name}eq'textblock'){ # $result.=sprintf"%s
%s
\n" # ,$space x$self->{'indent'},$self->_inline($_->{content}->[0]->{content}); # }else{ $result.=sprintf"%s
\n%s%s
\n" ,$space x$self->{'indent'},$self->_block($_->{content}),$space x$self->{'indent'}; # } } --$self->{'indent'}; $result.=sprintf"%s%s>\n",$space x$self->{'indent'},$tagName; }elsif('desclist'eq$_->{name}or'methodlist'eq$_->{name}){ # DescList or MethodList $result.=sprintf"%s<%s>\n",$space x$self->{'indent'},'DL'; ++$self->{'indent'}; for(@{$_->{content}}){ my$term=shift@{$_->{content}}; my$desc=shift@{$_->{content}}; $result.=sprintf"%s
%s
\n",$space x$self->{'indent'},'methodlistitem'eq$_->{'name'} ?$self->_methodLabel($term->{'content'}):$self->_termLabel($term->{'content'}); # if(@{$desc->{content}}==1&&$desc->{content}->[0]->{name}eq'textblock'){ # $result.=sprintf"%s
%s
\n" # ,$space x$self->{'indent'},$self->_inline($desc->{content}->[0]->{content}); # }else{ $result.=sprintf"%s
\n%s%s
\n" ,$space x$self->{'indent'},$self->_block($desc->{content}),$space x$self->{'indent'}; # } } --$self->{'indent'}; $result.=sprintf"%s%s>\n",$space x$self->{'indent'},'DL'; }elsif('verbatim'eq$_->{name}){ # Verbatim $result.=sprintf"%s
\n%s%s
\n" ,$space x$self->{'indent'},_escapeHTML($_->{content}),$space x$self->{'indent'}; } } --$self->{'indent'}if$self->{'indent'}; return$result; } # private $generator->_inline( $line ) sub _inline{ my$self=shift; my$line=shift; return _escapeHTML($line)unless ref$line; my$result=''; for(@$line){ if('text'eq$_->{'name'}){ $result.=_escapeHTML($_->{'content'}); }elsif('emphasis'eq$_->{'name'}){ $result.=sprintf'
%s
',$self->_inline($_->{'content'}); }elsif('code'eq$_->{'name'}){ $result.=sprintf'
%s
',$self->_inline($_->{'content'}); }elsif('var'eq$_->{'name'}){ $result.=sprintf'
%s
',$self->_inline($_->{'content'}); }elsif('keyboard'eq$_->{'name'}){ $result.=sprintf'
%s
',$self->_inline($_->{'content'}); }elsif('term'eq$_->{'name'}){ $result.=$self->_termLabel($_->{'content'}); }elsif('reference'eq$_->{'name'}){ $result.=sprintf'
%s
' ,'uri'eq$_->{'type'}?'':'#',_escapeHTML($_->{'label'}),$self->_inline($_->{'content'}); }elsif('footnote'eq$_->{'name'}){ push@{$self->{'footnote'}},$self->_inline($_->{'content'}); my$num=scalar@{$self->{'footnote'}}; $result.=sprintf'
*%d
',$num,$num,$num; } } return$result; } # private $generator->_termLabel( $line ) sub _termLabel{ my$self=shift; my$line=shift; my$plain=_getPlain($line); my$label=$plain; my$display=$self->_inline($line); return $self->_label($label,$display); } # private $generator->_methodLabel( $line ) sub _methodLabel{ my$self=shift; my$line=shift; $line=~/\s*([^\x28{]*[^\s\x28{])/o; my$label=$1; my$display=$line; return $self->_label($label,$display); } # private $generator->_label( $label, $display ) sub _label{ my$self=shift; my$label=shift; my$display=shift; $self->{'label'}={}unless$self->{'label'}; my$i=0; my$base=$label; while(exists$self->{'label'}->{$label}){ $label=sprintf'%s:%d',$base,$i; } $self->{'label'}->{$label}=1; return sprintf'
%s
',_escapeHTML($label),_escapeHTML($display); } # private _getPlain( $line ) sub _getPlain{ my$line=shift; return$line unless ref$line; my$result=''; for(@{$line}){ $result.=_getPlain($_->{'content'}); } return$result; } # private _escapeHTML( $str ) sub _escapeHTML{ my$str=shift; $str=~s/&/&/go; $str=~s/"/"/go; $str=~s/</go; $str=~s/>/>/go; return $str; } } #------------------------------------------------------------------------------# =head2 RD Parser RDを解析します。 =cut {package RD::Parser; # public RD::Parser->new() sub new{ my$class=shift; my$self={}; return bless$self,$class; } # private parse( $document ) sub parse{ my$class=shift; my$tab=' ' x 4; my$document=[map{my$line=$_;$line=~s/\G(\x20*)\t/$1$tab/go;$line}split("\n",shift||'')]; my@result; while(@$document){ shift(@$document)=~/^=begin\b/o||next; push@result,_parseBlock($document); } return\@result; } # private _parseBlock( $document ) sub _parseBlock{ my$document=shift; my@result; while(@$document){ my$line=$document->[0]; if($line=~/^=end\b/o){ shift@$document; last; }elsif($line=~/(#\?[^?]*\?+(?:[^#?][^?]*\?+)*#)/o){ # PI $1=~/#\?(.*)\?#/o; push@result,{name=>'pi',content=>$1}; shift@$document; # /^\x28\x28\?[^\?]*\?+(?:(?:\x29|\x29?[^\x29\?][^\?])\?+)*\x29\x29$/o }elsif($line=~/^#/o){ # Comment shift@$document; }elsif($line=~/^(={1,4})\s+(.*\S)/o){ # Headline 1-4 push@result,{name=>'headline',level=>length$1,content=>_parseInline($2)}; shift@$document; }elsif($line=~/^(\+{1,2})\s+(.*\S)/o){ # Headline 5-6 push@result,{name=>'headline',level=>4+length$1,content=>_parseInline($2)}; shift@$document; }elsif($line=~/^<<<\s*(.*\S)/o){ # Include push@result,{name=>'include',content=>$1}; shift@$document; }elsif($line=~/^\s*$/o){ # WHITELINE shift@$document; }elsif($line=~/^\s+(?![\*:])(?!\(\d+\))\S/o){ # Verbatim push@result,_verbatim($document); }elsif($line=~/^\s*\*/o){ # ItemList push@result,_itemlist($document); }elsif($line=~/^\(\d*\)/o){ # EnumList push@result,_enumlist($document); }elsif($line=~/^\s*:/o){ # DescList push@result,_desclist($document); }elsif($line=~/^\s*\-\-\-/o){ # MethodList push@result,_methodlist($document); }elsif($line=~/^(.*\S)/o){ # TextBlock push@result,_textblock($document); }else{ # shift@$document; } } return@result; } # private _textblock( $document ) sub _textblock{ my$document=shift; shift(@$document)=~/^(\s*(?:(?:[\*:]|\(\d+\))\s*)?)(.*\S)/o; my$baseline=length$1; my$result=$2; while(@$document){ $document->[0]=~/^#/o&&shift@$document&&next; $document->[0]=~/^[=+]/o&&last; $document->[0]=~/^\s{$baseline}(\s*(?:(?:[\*:]|\(\d+\))\s*)?)(\S.*\S|\S)/&&!$1||last; $result.=$2; shift@$document; } return{name=>'textblock',content=>_parseInline($result)}; } # private _verbatim( $document ) sub _verbatim{ my$document=shift; shift(@$document)=~/^(\s+)(.*\S)/o; my$baseline=length$1; my$result=$2; while(@$document){ $document->[0]=~/^#/o&&shift@$document&&next; $document->[0]=~/^\s{$baseline}(.*\S|\s*)/||last; $result.="\n".$1; shift@$document; } return{name=>'verbatim',content=>$result}; } # private _itemlist( $document ) sub _itemlist{ return __list(shift,{parent=>'itemlist',child=>'listitem',regex=>'\*'}); } # private _enumlist( $document ) sub _enumlist{ return __list(shift,{parent=>'enumlist',child=>'enumlistitem',regex=>'\(\d+\)'}); } # private __list( $document, $option ) sub __list{ my$document=shift; my$option=shift; $document->[0]=~/^(\s*)$option->{regex}/||die; my$baseline=length$1; my@result; while(@$document){ $document->[0]=~/^#/o&&shift@$document&&next; $document->[0]=~/^\s{$baseline}$option->{regex}/||last; $document->[0]=~/^(\s*$option->{regex}\s*)(\S?)/; my@item; my$baseline=length$1; if($2){ push@item,_textblock($document); }else{ shift@$document; } while(@$document){ $document->[0]=~/^#/o&&shift@$document&&next; $document->[0]=~/^\s{$baseline}(\s*(?:[\*:]|\(\d+\))\s*)?(\S?)/||last; if($1){ $1=~/(\S)/o; if('*'eq$1){ push@item,_itemlist($document); }elsif('('eq$1){ push@item,_enumlist($document); }elsif(':'eq$1){ push@item,_desclist($document); }else{ push@item,_verbatim($document); } }elsif($2){ push@item,_textblock($document); }else{ shift@$document; } } push@result,{name=>$option->{child},content=>\@item}; } return{name=>$option->{parent},content=>\@result}; } # private _desclist( $document ) sub _desclist{ return __dlist(shift,{parent=>'desclist',child=>'desclistitem',regex=>':'}); } # private _methodlist( $document ) sub _methodlist{ return __dlist(shift,{parent=>'methodlist',child=>'methodlistitem',regex=>'\-\-\-'}); } # private _dlist( $document, $option ) sub __dlist{ my$document=shift; my$option=shift; my@result; $document->[0]=~/^(\s*)$option->{regex}/||die; my$baseline=length$1; while(@$document){ $document->[0]=~/^#/o&&shift@$document&&next; $document->[0]=~/^(\s{$baseline}$option->{regex}\s*)(.*)/||last; my$base=length$1; $2=~/(\S.*\S|\S)/o||die'no Term Part'; my@item={name=>'term',content=>$option->{'parent'}eq'methodlist'?$1:_parseInline($1)}; shift@$document; shift@$document while$document->[0]=~/^#/o; $document->[0]=~/^(\s*)/o; my$baseline=length$1; $baseline>=$base or die'DescriptionパートのBaselineはTermパートのテキスト部分と同じかより深くないといけません。'; my@description; while(@$document){ $document->[0]=~/^#/o&&shift@$document&&next; $document->[0]=~/^\s{$baseline}(\s*(?:[\*:]|\(\d+\))\s*)?(\S?)/||last; if($1){ $1=~/^(\S)/o; if('*'eq$1){ push@description,_itemlist($document); }elsif('('eq$1){ push@description,_enumlist($document); }elsif(':'eq$1){ push@description,_desclist($document); }else{ push@description,_verbatim($document); } }elsif($2){ push@description,_textblock($document); }else{ shift@$document; } } push@item,{name=>'description',content=>\@description}; push@result,{name=>$option->{child},content=>\@item}; } return{name=>$option->{parent},content=>\@result}; } # private _parseInline( $str ) sub _parseInline{ my$str=shift; return[{name=>'text',content=>$str}]unless$str=~/[\x28\x29]{2}/o; my@line= map{/\x28\x28([^\x28])/o?('((',$1) : /([^\x29])\x29\x29/o?('))',$1) : $_} grep{$_&&length$_}$str=~/(.*?)(\x28\x28[^\x28]|[^\x29]\x29\x29|$)/go; return _parseInlineArray(\@line); } # private _parseInlineArray( $inline [,$option] ) sub _parseInlineArray{ my$line=shift; my$option=@_?shift:{}; my$regexInline=qr/[-*\{|%]/; my%inline=( '*'=>{name=>'emphasis', close=>'*'}, '{'=>{name=>'code', close=>'}'}, '|'=>{name=>'var', close=>'|'}, '%'=>{name=>'keyboard', close=>'%'}, ':'=>{name=>'term', close=>':'}, '<'=>{name=>'reference',close=>'>'}, '-'=>{name=>'footnote', close=>'-'}, # "'"=>{name=>'verbatim', close=>"'"}, '?'=>{name=>'pi', close=>'?'}, ); my@result; my$tmp=''; while(@$line){ my$fragment=shift@$line; if(!defined$fragment){ die'broken inline'; }elsif('))'eq$fragment){ my$fragment=shift@$line; if(!defined$fragment){ die'broken inline'; }elsif($option->{'close'}&&$fragment eq$option->{'close'}){ last; }else{ $tmp.=$fragment.'))'; } }elsif('(('eq$fragment){ $fragment=shift@$line; if(!defined$fragment){ die'broken inline'; }elsif("'"eq$fragment){ # Verbatim while(@$line){ my$fragment=shift@$line; if(!defined$fragment){ die'broken inline'; }elsif('))'eq$fragment){ my$fragment=shift@$line; if(!defined$fragment){ die'broken inline'; }elsif("'"eq$fragment){ last; }else{ $tmp.=$fragment.'))'; } }else{ $tmp.=$fragment; } } }elsif(!$option->{'isAnchor'}&&$fragment=~/[:<]/o){ # Label or Reference push@result,{name=>'text',content=>$tmp}if$tmp; $tmp=''; if('<'eq$fragment){ push@result,_reference($line); }else{ push@result,{name=>$inline{$fragment}->{'name'} ,content=>_parseInlineArray($line,{close=>$inline{$fragment}->{'close'},isAnchor=>1})}; } }elsif($fragment=~/$regexInline/o){ push@result,{name=>'text',content=>$tmp}if$tmp; $tmp=''; push@result,{name=>$inline{$fragment}->{'name'} ,content=>_parseInlineArray($line,{close=>$inline{$fragment}->{'close'}})}; }else{ $tmp.='(('.$fragment; } }else{ $tmp.=$fragment; } } push@result,{name=>'text',content=>$tmp}if$tmp; return\@result; } # private _reference( $line [,$option] ) sub _reference{ my$line=shift; my$option=@_?shift:{}; my$reference; while(@$line){ my$fragment=shift@$line; if(!defined$fragment){ die'broken inline'; }elsif('))'eq$fragment){ $fragment=shift@$line; if(!defined$fragment){ die'broken inline'; }elsif('>'eq$fragment){ last; }else{ $reference.=$fragment.'))'; } }else{ $reference.=$fragment; } } $reference=~/(?:\s*(?:([^"'|\/]+)|"([^"\\]*(?:\\.[^"\\]*)*)"|'([^'\\]*(?:\\.[^'\\]*)*)')\s*\|)?(.+)/o or return''; my$display=$1||$2||$3; my$label=$4; my$type; if($label=~s/^UR[LI]:(.+)/$1/o){ $display=$1 unless defined$display; $type='uri'; }elsif(!$display){ $display=$label; $label=_getPlain(_parseInline($label)); $type='term'; }else{ $display=~s/\\(.)/$1/go; $type='term'; } return{name=>'reference',label=>$label,type=>$type,content=>_parseInline($display)}; } # private _getPlain( $line ) sub _getPlain{ my$line=shift; return$line unless ref$line; my$result=''; $result.=_getPlain($_->{'content'})for@{$line}; return$result; } } #------------------------------------------------------------------------------# =head2 RD RDを処理します。 =cut { package RD; my%toString; # public RD->new( [$document] ) sub new{ my$class=shift; my$document=@_?shift:''; my$self={}; bless$self,$class; $self->load($document); return$self; } # public $rd->load( $document ) sub load{ my$self=shift; my$document=shift; $self->{document}=$document; return $self->{objRD}=RD::Parser->parse($document); } # public $rd->toString( $type [, [key => value] ..] ) sub toString{ my$self=shift; my$type=shift; my$generator=new RD::Generator($type); return$generator->generate($self->{objRD},@_); } } #------------------------------------------------------------------------------# &main if$::CF{'program'}eq __FILE__; #------------------------------------------------- # 初期設定 # package main; BEGIN{ $CF{'encoding'}||='euc-jp'; unless($CF{'program'}){ $CF{'program'}=__FILE__; $SIG{'__DIE__'}=sub{print ::euc2sjis(@_?"$_[0]":'ERROR');exit}; } $CF{'RDParser'}=__FILE__; } 1; __END__ =begin #------------------------------------------------------------------------------# = おまけ〜RD2XML Test Suite RD解析のテストケースなのです。 #?index?# #------------------------------------------------------------------------------# == Baseline |この行はトップレベルのTextBlockの行だとします。 |<- したがって、Baselineは左端です。 *|List内では (1) |<- このようにBaselineは(1)の行で決定されます。 * |同じListでもListItem毎にBaselineが決定されます。 |<- したがって、1番目のListItemとは違うここにBaselineがあります。 == Headline = Headline 1. === Headline 1.1.1. + Headline 1.1.1.1.1. #------------------------------------------------------------------------------# == TextBlock これはTextBlockです。 TextBlockの2行目の行です。 この行はTextBlockでなくVerbatimです。 * そしてこの行はListの行です。((-正確にはListItemの中のTextBlockの行でも あるのですが-)) #------------------------------------------------------------------------------# == Verbatim これはVerbatimです。 最初の行より深いインデントを持っても、同じVerbatimの行になります。 * この行はListに見えますが、Verbatimです。 しかしこの行は最初の行よりも浅くインデントされているので、別のVerbatim #comment の行になります。 #------------------------------------------------------------------------------# == Item List * 親Listの最初のItem * 子Listの最初のItem * 孫Listの最初のItem * 孫Listの2番目のItem * 曾孫Listの最初のItem #comment * 曾孫Listの2番目のItem * 玄孫Listの最初のItem * 子Listの2番目のItem 親ListのItemに含まれるTextBlock #------------------------------------------------------------------------------# == Enum List (1) 親Listの最初のItem * 子ListとなるItemList (2) 親Listの2番目のItem #comment (10) 番号は無視されます。 #------------------------------------------------------------------------------# == Desc List :Term Descriptionの最初の行 2番目の行 :Term 2 #comment * aaa #comment * Listも含む事ができます * ... :Identity or URL らべる #------------------------------------------------------------------------------# == Method List --- Array#each {|i| ... } # => Labelは"Array#each" 各項目に対してブロックを評価する。 --- Array#index(val) # => Labelは"Array#index" ((|val|))と同じ値である最初の項目を返す。同じ項目が無いときには (({nil}))を返す。 ((*Em*)) (({while gets...})) ((|var|)) ((%ruby -v%)) ((:Term:)) ((
)) ((-Footnote-)) (('v((*er*))b')) #comment ((<((*目次*))>)) #?footnote?# == Copyright Copyright (c) 2001-2003 NARUSE,Yui (((
))). All rights reserved. =end =head2 COPYRIGHT Copyright (c) 2001-2003 NARUSE,Yui (((
))). No rights reserved. =cut