#!/usr/bin/perl
# (c) Stas Mishchenkov 2:460/58

use strict;
use warnings;

use File::Spec::Functions;
use Getopt::Long;
use Cwd 'abs_path';


my ( $filename, $text, $rulesdir, $help, $DH, $i );


abs_path($0) =~ /^(.*?)[\\\/]([^\\\/]+)$/;
my ( $curpath, $programfile ) = ( $1, $2 );


sub readfile($)
{
	my ( $filename ) = @_;
	my ( $HANDLE, $size, $buff );

	$size = -s $filename;
	if ( open ( $HANDLE, '<', $filename ) ) {
	    unless( read( $HANDLE, $buff, $size ) > 0 ) {
	    	print STDERR "Can't read $filename ($!).\n";
			close( $HANDLE );
			return 0;
		}
	    close( $HANDLE );
	} else {
	    print STDERR "Can't open $filename ($!).\n";
		return 0;
	}
	return $buff;
}

sub usage()
{
	print "This program is designed for checking text files in the CP866 encoding\n".
          "for the presence of substitution of Russian letters for Latin letters\n".
          "similar in style.\n\n".
          "Usage: $programfile <options>\n\n".
          "Options are:  --file|-f <filename>     - filename to test.\n".
          "              --dir|-d  <dirname>      - dir name to test all files in.\n\n";
	exit;
}

sub testfile()
{
		$filename = catfile( $curpath, $filename ) if $filename !~ /[\\\/]/;
		$text = readfile( $filename );
		$i=0;
		foreach my $z ( split( /\n/, $text ) ) {
			$i++;
			foreach my $w ( split( /\s/, $z ) ) {
				if ( $w =~ /[ -ï€-Ÿ]+/i && $w =~ /([a-z]+)/i ) {
					print "Line " . sprintf("%4d\:", $i) . " \'$1\' ".sprintf("0x%2x",unpack("C",$1))." ($w)\n";
				}
			}
		}
}

  GetOptions (
		"help"   => \$help,
		"dir=s"  => \$rulesdir,
		"file=s" => \$filename
	    )
  or die("Error in command line arguments\n");

	usage() if $help;
	usage() if !defined( $rulesdir ) && !defined( $filename );
	if ( defined( $rulesdir ) && defined( $filename ) ) {
		print "--file and --dir are mutually exclusive options.\n";
		exit;
	}
	if ( defined( $filename ) ) {
		testfile();
		exit;
	}

	unless( opendir( $DH, $rulesdir ) ) {
		print STDERR "Can't open $rulesdir ($!).";
		exit;
	}
	while( readdir( $DH ) ) {
		next if $_ =~ /^\./;
		$filename = catfile( $rulesdir, $_ );
		next if -d $filename;
		print "\n\'$_\':\n";
		testfile();
	}
