[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Forward: Re: CVS commit: htdocs
>  > スクリプトが直接修正するのは恐いので、こないだのようなものを
>  > www-changes-jaに流すとよいかなと思っています。
> これは
>  > /ja/Changes/index.html
>  > 	../MailingLists/ should be /MailingLists/
> だと思ってるんですが、
そうですそうです。
> これは翻訳ファイルが追加された時には
> 
> /MailingLists/ should be ../MailingLists/
> 
> とか出力してくれるんでしょうか? 
そのはずだったんですが、今試しに
$ mkdir /ja/MailingLists/; touch /ja/MailingLists/index.html
$ ./urlcheck.pl
してみたら何も表示されませんでした…。あー、スクリプト間違ってる。
ということで直したやつで実験したら、こういうのが表示されました。
$ rm -rf /ja/MailingLists
$ time perl urlcheck.pl 
/ja/Documentation/network/index.html
        /Misc/feedback.html should be /ja/Misc/feedback.html
/ja/index.html
        ../index.html should be /ja/index.html
real    1m3.963s
user    0m50.590s
sys     0m2.052s
$ 
ちゃんと表示してるみたいです。/ja/index.htmlのほうはそのままの
ほうがいいので、これは出ないようにしますけど。
チェックに使うつもりのurlcheck.plと、昨日使ったurlconv.plを
添付しておきます。urlcheck.plをhtdocs/直下で使えば、
上記のような結果が得られると思います。
-- 
iかわもと よしひさ!                   kawamoto@es.osaka-u.ac.jp
#!/usr/pkg/bin/perl
use File::Find;
use HTML::Parse;
use HTML::Element;
%jafiles = ();
find(\&wanted, "ja");
for $jafile (sort(keys %jafiles)) {
    $msg = '';
    $parsed = HTML::Parse::parse_htmlfile("./$jafile");
    for (@{ $parsed->extract_links() }) {
	$_ = $_->[0];
	$url = $_;
	if (!/^\w*:/ && !/^$/ && !/^#/ && /^(.*\/)?([^\/#]*)(#.*)?$/) {
	    $dir = $1;
	    $file = $2;
	    $mark = $3;
	    if (/^\//) {
		$_ = "$dir$file";
	    } else {
		$_ = $jafile;
		s/\/[^\/]+$/\/$dir$file/;
	    }
	    s/\/\.$/\//;
	    s/(\/\.\.?)$/$1\//;
	    1 while (s/\/(?!\.\.\/)[^\/]+\/\.\.\//\//);
	    s/\/$/\/index.html/;
	    if (/^\/ja\//) {
		if ($jafiles{$_}) {
		    # The link seems to point existing file.
		    # This is OK.
		} else {
		    # The link points non existent file.
		    # This is NG.
		    s/^\/ja//;
		    s/\/[^\/]+$/\/$file$mark/;
		    $msg .= "\t$url should be $_\n";
		}
	    } elsif (/^\/[^\/\.]+\.[^\/]+\//) {
		# This is probably outside link.
		# This is NG.
		$msg .= "\t$url should be ../$url\n";
	    } else {
		# This is local link, maybe.
		if ($jafiles{"/ja$_"}) {
		    # There is a translated file.
		    # This is NG.
		    s/\/[^\/]+$/\/$file$mark/;
		    $msg .= "\t$url should be /ja$_\n";
		} else {
		    # There are no traslated files.
		    # This is OK, or a wrong link.
		}
	    }
	}
    }
    print "$jafile\n$msg" if ($msg);
}
sub wanted {
    $jafiles{"/$File::Find::dir/$_"} = 1 if (-f && /\.html$/);
}
#!/usr/pkg/bin/perl
use File::Find;
use File::Path;
use File::Basename;
use HTML::Parse;
use HTML::Element;
%jafiles = ();
find(\&wanted, "ja");
for $jafile (sort(keys %jafiles)) {
    @html = ();
    open(HTML, "./$jafile") || next;
    @html = <HTML>;
    close(HTML);
    $parsed = HTML::Parse::parse_html(join('', @html));
    $msg = '';
    @urls = ();
    %newurl = ();
    for (@{ $parsed->extract_links() }) {
	$_ = $_->[0];
	$url = $_;
	if (!/^\w*:/ && !/^$/ && !/^#/ && /^(.*\/)?([^\/#]*)(#.*)?$/) {
	    $dir = $1;
	    $file = $2;
	    $mark = $3;
	    if (/^\//) {
		$_ = "$dir$file";
	    } else {
		$_ = $jafile;
		s/\/[^\/]+$/\/$dir$file/;
	    }
	    s/\/\.$/\//;
	    s/(\/\.\.?)$/$1\//;
	    1 while (s/\/(?!\.\.\/)[^\/]+\/\.\.\//\//);
	    s/\/$/\/index.html/;
	    if (/^\/ja\//) {
		if ($jafiles{$_}) {
		    # The link seems to point existing file.
		    # This is OK.
		} else {
		    # The link points non existent file.
		    # This is NG.
		    s/^\/ja//;
		    s/\/[^\/]+$/\/$file$mark/;
		    $msg .= "\t$url should be $_\n";
		    push(@urls, $url);
		    $newurl{$url} = $_;
		}
	    } elsif (/^\/[^\/\.]+\.[^\/]+\//) {
		# This is probably outside link.
		# This is NG.
		$msg .= "\t$url should be ../$url\n";
		push(@urls, $url);
		$newurl{$url} = "../$url";
	    } else {
		# This is local link, maybe.
		if ($jafiles{"/ja$_"}) {
		    # There is a translated file.
		    # This is NG.
		    s/\/[^\/]+$/\/$file$mark/;
		    $msg .= "\t$url should be /ja$_\n";
		    push(@urls, $url);
		    $newurl{$url} = "/ja$_";
		} else {
		    # There are no traslated files.
		    # This is OK, or a wrong link.
		}
	    }
	}
    }
    #print "$jafile\n$msg" if ($msg);
    print "$jafile\n" if ($msg);
    &mkfile($jafile, @urls) if (@urls);
}
sub wanted {
    $jafiles{"/$File::Find::dir/$_"} = 1
	if (-f && (/\.html$/ || /\.list$/ ||
		   /^index.faq$/ || /^post$/ || /^pre$/));
}
sub mkfile {
    my ($jafile, @urls) = @_;
    $jafile =~ s<^/ja><./ja-new>;
    mkpath(dirname($jafile));
    open(NEW, "> $jafile") || return;
    for (@html) {
	shift @urls if (s/(["']\Q$urls[0]\E['"])/"$newurl{$urls[0]}" origlink=$1/);
	print NEW "$_";
    }
    close(NEW);
    print "*** WARNING ***: some links are not found: @urls\n" if (@urls);
}