A perl script to clean "; " from konqueror konqueror-embedded-snapshot-20030705

Wed Aug 31 23:19:08 CEST 2005

In process of porting konqueror-embedded-snapshot-20030705

 using 

arm-linux-toolchain-bin-12-15-04-driscoll.tar.gz toolchain 

I run into incompitability problems - mostly ";" like

namespace XXX{
....
};

I get bored cleaning them manualy and wrote a perl script.
May be it can be benefitial for others on the list.
I guess it can be modified to clean ";" from the function definitions
like
AAA BBB(CCCC){
};
I never did that. It was not that many cases and I just fixed them
manualy.
I am sure there are better parsers available.

Alex

======================================================

#!/usr/bin/perl -w
use strict;
use warnings;
local $main::debug=2;

#cleaning `;' from
#namespace{ ...};
#USING: perl cleansemicolon.pl

# modified from
http://vipe.technion.ac.il/~shlomif/lecture/Perl/Newbies/lecture2/functi
ons/
sub min
{
    my (@numbers);
    @numbers = @_;
    my ($min, $max);
    $min = $numbers[0];
    foreach my $i (@numbers){
        if ($i < $min){
            $min = $i;
        }
    }
    return $min;
}
#-----------------------------------------------------------------------
-
# Parse tree and collect all Files
# modified from www.pohlheim.com/perl/replacetree98.bat
#-----------------------------------------------------------------------
-
sub ParseTree
{
   my($dirname, $patternfiles) = @_;
   my($file, $filematch);
   local(*AKTDIR);
   my(@ALLEFILES);

   opendir(AKTDIR, $dirname) || die "ParseTree - Can't open directory
$dirname: $!";
   if ($main::debug > 1) { print "\nDirectory: $dirname\n"; }

   # read everything from this directory and process them
   @ALLEFILES = readdir(AKTDIR);

   foreach $file (@ALLEFILES) {
        # exclude . and .. directories
        next if $file eq '.';  next if $file eq '..';
        # test for existense of entry (redundant, used for debugging)
        my $filepath = $dirname.'/'.$file;
        if (-e $filepath) {
             # if it's a directory, call this function recursively
            if (-d $filepath) {
                   &ParseTree($filepath, $patternfiles);
            }
            # if it's a file - test for filepattern and call
SearchReplaceInclude
            elsif (-f $filepath) {
                $filematch = 0;
                if ($file =~ /$patternfiles/) {
                    &cleansemifile($filepath);
                }
            }
            else { print "Unknown type of file in $dirname: $file\n"; }
        }
    }

   closedir(AKTDIR)

}
# modified from www.pohlheim.com/perl/replacetree98.bat
sub cleansemifile {
    my $fname=$_[0];
    open(FILEIN, "<$fname")  || die("Can't open file: $fname\n");
    #process file
    my $filestring="";
    for my $linein(<FILEIN>)
    {
        $filestring .= $linein;
    }
    close(FILEIN);
    my $newfilestring = cleansemistring($filestring);
    if ($newfilestring ne $filestring)
    {
        print ("$fname\n") if ( $main::debug >1);
        open(FILEOUT, ">${fname}_out")  || die("Can't open file:
${fname}_out\n");
        print FILEOUT $newfilestring;
        close (FILEOUT);
        #rename old file, rename new file delete old file
        rename($fname, "${fname}_tmp");
        rename("${fname}_out", $fname);
        unlink("${fname}_tmp");
    }
}
# an underscored string is passed
# namespace {......{....}...};...
#            --------------------
sub cleansemistring{
    my $s = $_[0];
    my $bracketcount = 1;
    #THIS IS A REGEX WE LOOK FOR
    my $myregex = '\s*?namespace\s.*?.\w*?{';
    my $mynotregex = '\s*?using\s*?namespace';
    my $offset = 0;
    my $startoffset = 0;
    my $ifound = 0;
    my $comment1offsetstart; # //
    my $comment2offsetstart; # /*
    my $skipcomment1offset;  # after the end of line
    my $skipcomment2offset;  # after */
    my $regexoffset;         # namespace XXX {
    my $notregexoffset;      # using namespace XXX
    my $openbracketoffset;
    my $closebracketoffset;
    FOR:
    $offset = $startoffset;
    while($offset < length($s)){
        my $reststr = substr($s, $offset);

        $openbracketoffset = $closebracketoffset =
        $comment1offsetstart = $comment2offsetstart =
        $notregexoffset = $regexoffset = length($s);

        if ($reststr =~ /\/\//){
            $comment1offsetstart = $offset + length($`);
        }
        if ($reststr =~ /\/\*/s){
            $comment2offsetstart = $offset + length($`);
        }
        # if we are not parsin brackets, but looking for namespace {
        if ($ifound == 0){
            if ($reststr =~ /$mynotregex/s){
                $notregexoffset = $offset + length($`);
            }
            if ($reststr =~ /$myregex/s){
                $regexoffset = $offset + length($`);
            }
        }
        else { # parse brackets
            if ($reststr =~ /\{/){
                $openbracketoffset = $offset + length($`);
            }
            if ($reststr =~ /\}/){
                $closebracketoffset = $offset + length($`);
            }
        }

        my $minskip = min(($openbracketoffset, $closebracketoffset,
                        $comment1offsetstart, $comment2offsetstart,
$notregexoffset, $regexoffset));
        $offset = $minskip; 
        $reststr = substr($s, $offset);
        if ($comment1offsetstart == $minskip){
            $reststr =~ /^\/\/.*?\n/;
            $offset += length($&);
            next;
        }
        elsif($comment2offsetstart == $minskip){
            $reststr =~ /^\/\*.*?\*\//s;
            $offset += length($&);
            next;
        }
        if($notregexoffset == $minskip){
            $reststr =~ /$mynotregex/s;
            $offset += length($&);
            next;
        }
        elsif($regexoffset == $minskip){
            $reststr =~/$myregex/s;
            $startoffset = $offset + length($`) + length($&); #to check
the rest of the string
            $offset = $startoffset;
            $bracketcount = 1;
            $ifound=1;
            next;
        }
        elsif($openbracketoffset == $minskip){
            $bracketcount +=1;
            $offset +=1;
            next;    
        }
        elsif($closebracketoffset == $minskip){
            $bracketcount -=1;
            if($bracketcount == 0){
                #found matching {}remove ";"
                my $ss = substr($s, $offset+1);
                $ss =~ s/^(\s*?);/$1/;
                substr($s, $offset+1) = $ss;
                $ifound=0;
                # we start after the previous $myregex match
                # in case there are nested namespaces
                my $recurse = substr($s, $startoffset, $offset -
$startoffset);
                $recurse = cleansemistring($recurse);
                substr($s, $startoffset,$offset - $startoffset ) =
$recurse;
                $offset = $startoffset + length($recurse);
                next;
            }
            $offset += 1; 
            next;
       }
        else{
            last;
        }
    }    
    return $s;
}
#===============================
sub PrintUsage
{
    print <<USAGE_EOF;
        remove the finishing ; from namespace{...};

        Usage:

        perl cleansemicolon.pl DIR

USAGE_EOF
exit 1;
}
if( @ARGV <1 ){
    PrintUsage();
}
#ParseTree(".", $ARGV[0]);
ParseTree($ARGV[0], '.*\.cpp|.*\.h|.*\.cc');
1;

======================================================