Attachment 'update-AL20.pl'

Download

   1 #!/usr/bin/perl -w
   2 
   3 # Licensed to the Apache Software Foundation (ASF) under one or more
   4 # contributor license agreements.  See the NOTICE file distributed with
   5 # this work for additional information regarding copyright ownership.
   6 # The ASF licenses this file to You under the Apache License, Version 2.0
   7 # (the "License"); you may not use this file except in compliance with
   8 # the License.  You may obtain a copy of the License at
   9 #
  10 #     http://www.apache.org/licenses/LICENSE-2.0
  11 #
  12 # Unless required by applicable law or agreed to in writing, software
  13 # distributed under the License is distributed on an "AS IS" BASIS,
  14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 # See the License for the specific language governing permissions and
  16 # limitations under the License.
  17 #
  18 #=========================================================
  19 #
  20 # Purpose:
  21 # Update the license header for files that have an existing
  22 # Apache License 2.0 which used the old style copyright line.
  23 #
  24 # Investigates every text file and detects various different versions
  25 # of the ASF license header.
  26 #
  27 # Developed only for UNIX, YMMV.
  28 #
  29 # Procedure:
  30 # Run the script. It will descend the directory tree.
  31 # Run with no parameters or -h to show usage.
  32 # Use practice mode (-p) to see what existing license headers are detected.
  33 # Use verbose practise mode (-v -p) to scan for WARN and ERROR. 
  34 # Add patterns to an avoidList to skip certain files or directories.
  35 #
  36 # Note:
  37 # You might prefer the tools/copy2license.pl which operates on a specific set of
  38 # files. Whereas update-AL20.pl investigates all text files found, so useful
  39 # for xml-related projects which tend to many different filename extensions.
  40 # Note:
  41 # Attend to the warning in tools/copy2license.pl about "collective copyright".
  42 #=========================================================
  43 
  44 use strict;
  45 use vars qw($opt_p $opt_h $opt_v);
  46 use Getopt::Std;
  47 use File::Basename;
  48 use File::Find;
  49 
  50 my ($countTotal, $countLicenseMissing, $countAvoid) = (0, 0, 0);
  51 my ($countLicense20, $countLicense20a, $countLicense20b) = (0, 0, 0);
  52 my $countLicenseF20 = 0;
  53 my $countLicense20x = 0;
  54 
  55 #--------------------------------------------------
  56 # ensure proper usage
  57 getopts("phv");
  58 if ((scalar @ARGV < 1) || defined($opt_h)) {
  59   ShowUsage();
  60   exit;
  61 }
  62 
  63 my $startDir = shift;
  64 if (!-e $startDir) {
  65   print "\nThe start directory '$startDir' does not exist.\n";
  66   ShowUsage();
  67   exit;
  68 }
  69 my $avoidList = shift;
  70 if (defined($avoidList) && !-e $avoidList) {
  71   print qq!
  72 The list of files to avoid '$avoidList' does not exist.
  73 !;
  74   ShowUsage();
  75   exit;
  76 }
  77 my @avoidList;
  78 # read the avoidList
  79 if (defined($avoidList)) {
  80   open(INPUT, "<$avoidList") or die "Could not open input file '$avoidList': $!";
  81   while (<INPUT>) {
  82     next if (/^#/);
  83     chomp;
  84     push(@avoidList, $_);
  85   }
  86   close INPUT;
  87 }
  88 
  89 #--------------------------------------------------
  90 # Do the work
  91 
  92 chdir "$startDir" or die "Cannot cd to '$startDir': $!\n";
  93 
  94 if ($opt_p) { print "\nDoing practice run. No files will be written\n"; }
  95 print qq!
  96 AL-20 = Apache License 2.0 with original Copyright line.
  97 AL-20a = Apache License 2.0 with original Copyright line and "or its licensors".
  98 AL-20b = Apache License 2.0 with no Copyright line, i.e. the current style.
  99 AL-20x = same as AL-20b but missing the word "contributor".
 100 ----------------------
 101 
 102 !;
 103 
 104 my $newLicenseHeader = <<"EOT";
 105 Licensed to the Apache Software Foundation (ASF) under one or more
 106 LEADERcontributor license agreements.  See the NOTICE file distributed with
 107 LEADERthis work for additional information regarding copyright ownership.
 108 LEADERThe ASF licenses this file to You under the Apache License, Version 2.0
 109 LEADER(the "License"); you may not use this file except in compliance with
 110 LEADERthe License.  You may obtain a copy of the License at
 111 EOT
 112 chomp($newLicenseHeader);
 113 
 114 #--------------------------------------------------
 115 sub process_file {
 116   return unless -f && -T; # process only text files
 117   my $fileName = $File::Find::name;
 118   my ($file, $dir, $ext) = fileparse($fileName, qr/\.[^.]*/);
 119   return if ($dir =~ /\/\.svn\//); # skip SVN directories
 120   return if ($dir =~ /\/CVS\//); # skip CVS directories
 121   return if ($dir =~ /\/build\//); # skip build directories
 122   return if ($file =~ /^\./); # skip hidden files
 123   $fileName =~ s/^\.\///; # strip leading ./
 124   my $pathName = $startDir . "/" . $fileName;
 125   foreach my $avoidFn (@avoidList) {
 126     if ($pathName =~ /$avoidFn/) {
 127       $countAvoid++;
 128       return;
 129     }
 130   }
 131   $countTotal++;
 132   if ($opt_v) { print "$fileName : "; }
 133   my $licenseType = "";
 134   my $updatedLicenseHeader = "";
 135   my $commentLeader = "";
 136   open(INPUT, "<$pathName") or die "Could not open input file '$pathName': $!";
 137   undef $/;  # slurp the whole file
 138   my $content = <INPUT>;
 139   $/ = "\n"; # reset input record separator
 140   # we want our matches to happen only in the top part of the file
 141   my $headContent = substr($content, 0, 1200);
 142   my $tailContent = "";
 143   if (length($content) > 1200) {
 144     $tailContent = substr($content, 1200);
 145   }
 146   # detect various existing licenses and the comment leader
 147   LICENSE_CASE: {
 148     if ($headContent =~ /Apache Software Foundation \(ASF\) under one/) {
 149       if ($headContent !~ /contributor license agreements\.[ ]+See the NOTICE/) {
 150         # detect an error with a previous version of this script
 151         $countLicense20x++; $licenseType = "AL-20x";
 152       }
 153       else {
 154         $countLicense20b++; $licenseType = "AL-20b";
 155       }
 156       last LICENSE_CASE;
 157     }
 158     if ($headContent =~ /Licensed *under *the *Apache *License.*Version 2.0/) {
 159       if ($headContent =~ /Apache Software Foundation or its licensors/) {
 160         $countLicense20a++; $licenseType = "AL-20a";
 161       }
 162       else {
 163         if ($headContent =~ /Copyright.*Apache Software Foundation/) {
 164           $countLicense20++; $licenseType = "AL-20";
 165         }
 166         else {
 167           $countLicenseF20++; $licenseType = "F-AL-20";
 168         }
 169       }
 170       if ($headContent =~ /(.*)Copyright ([0-9-, ]+) The Apache Software Foundation/) {
 171         $commentLeader = $1;
 172       }
 173       last LICENSE_CASE;
 174     }
 175     # catchall
 176     $countLicenseMissing++;
 177     if ($opt_v) { print "WARN: not Apache License"; }
 178   }
 179   if ($opt_v && $licenseType) { print "found $licenseType"; }
 180 
 181   if (!$licenseType || $licenseType eq "AL-20b" || $licenseType =~/^F-/) {
 182     if ($opt_v) { print ", skipping\n"; }
 183     close INPUT;
 184     return;
 185   }
 186   close INPUT;
 187   $updatedLicenseHeader = $newLicenseHeader;
 188   if ($commentLeader eq '/* ')
 189   {
 190     $updatedLicenseHeader = "\n * ".$updatedLicenseHeader;
 191     $commentLeader = ' * ';
 192   }
 193   $updatedLicenseHeader =~ s/^LEADER/$commentLeader/gosm;
 194   my $headerUpdated = 0;
 195   if ($opt_v) { print "\n"; }
 196   UPDATE_CASE: {
 197     if ($headContent =~ s#Copyright ([0-9-, ]+) The Apache Software Foundation.*You may obtain a copy of the License at *$#$updatedLicenseHeader#osm) {
 198       $headerUpdated = 1;
 199       last UPDATE_CASE;
 200     }
 201     if ($licenseType eq "AL-20x") {
 202       if ($headContent =~ s#(license agreements\.[ ]+See the NOTICE)#contributor $1#) {
 203         $headerUpdated = 1;
 204         last UPDATE_CASE;
 205       }
 206     }
 207     print "ERROR: Could not replace license header pattern\n";
 208   }
 209   if (!$opt_p && $headerUpdated) {
 210     open(OUTPUT, ">$pathName") or die ("Could not open output file '$pathName': $!\n");
 211     print OUTPUT $headContent . $tailContent;
 212     close OUTPUT;
 213   }
 214 }
 215 find(\&process_file, ".");
 216 
 217 #--------------------------------------------------
 218 # report some statistics
 219 print qq!
 220 ----------------------
 221 Processed $countTotal text files:
 222 $countLicense20b files already had AL-20b and were skipped.
 223 $countLicense20x files had AL-20x and were updated.
 224 $countLicense20a files had AL-20a and were updated.
 225 $countLicense20 files had the original AL-20 and were updated.
 226 $countLicenseF20 files were 3rdParty users of AL-20 and were skipped.
 227 $countLicenseMissing files were not Apache License 2.0 (or had no license) and were skipped.
 228 !;
 229 if (defined($avoidList)) {
 230   print "Avoided $countAvoid files as specified in the avoidList\n";
 231 }
 232 print "\n";
 233 
 234 if ($opt_p) { print "Finished practice run.\n"; }
 235 
 236 #==================================================
 237 # ShowUsage
 238 #==================================================
 239 
 240 sub ShowUsage {
 241   print qq!
 242 Usage: $0 [-h] [-p] [-v] startDir [avoidList] > logfile
 243                                                                                 
 244   where:
 245   startDir = The directory (pathname) to start processing. Will descend.
 246   avoidList = List of files or directories to avoid, one per line.
 247 
 248   option:
 249   h = Show this help message.
 250   v = Be verbose.
 251   p = Do a practice run. Do not write any files.
 252 
 253 Note: It will skip directories with name /build/
 254 Add other patterns to the avoidList.
 255 
 256 !;
 257 }

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

You are not allowed to attach a file to this page.