Attachment 'update-AL20.pl'

Download

   1 #!/usr/bin/perl -w
   2 
   3 # Licensed to the Apache Software Foundation (ASF) under one or more
   4 # contributor license agreements.  See the NOTICE file distributed with
   5 # this work for additional information regarding copyright ownership.
   6 # The ASF licenses this file to you under the Apache License, Version 2.0
   7 # (the "License"); you may not use this file except in compliance with
   8 # the License.  You may obtain a copy of the License at
   9 #
  10 #     http://www.apache.org/licenses/LICENSE-2.0
  11 #
  12 # Unless required by applicable law or agreed to in writing, software
  13 # distributed under the License is distributed on an "AS IS" BASIS,
  14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 # See the License for the specific language governing permissions and
  16 # limitations under the License.
  17 #
  18 #=========================================================
  19 #
  20 # Purpose:
  21 # Update the license header for files that have an existing
  22 # Apache License 2.0 which used the old style copyright line.
  23 #
  24 # Investigates every text file and detects various different versions
  25 # of the ASF license header.
  26 #
  27 # Developed only for UNIX, YMMV.
  28 #
  29 # Procedure:
  30 # Run the script. It will descend the directory tree.
  31 # Run with no parameters or -h to show usage.
  32 # Use practice mode (-p) to see what existing license headers are detected.
  33 # Use verbose practise mode (-v -p) to scan for WARN and ERROR. 
  34 # Add patterns to an avoidList to skip certain files or directories.
  35 #
  36 # Note:
  37 # You might prefer the tools/copy2license.pl which operates on a specific set of
  38 # files. Whereas update-AL20.pl investigates all text files found, so useful
  39 # for xml-related projects which tend to many different filename extensions.
  40 # Note:
  41 # Attend to the warning in tools/copy2license.pl about "collective copyright".
  42 #=========================================================
  43 
  44 use strict;
  45 use vars qw($opt_p $opt_h $opt_v);
  46 use Getopt::Std;
  47 use File::Basename;
  48 use File::Find;
  49 
  50 my ($countTotal, $countLicenseMissing, $countAvoid) = (0, 0, 0);
  51 my ($countLicense20, $countLicense20a, $countLicense20b) = (0, 0, 0);
  52 my $countLicense20x = 0;
  53 
  54 #--------------------------------------------------
  55 # ensure proper usage
  56 getopts("phv");
  57 if ((scalar @ARGV < 1) || defined($opt_h)) {
  58   ShowUsage();
  59   exit;
  60 }
  61 
  62 my $startDir = shift;
  63 if (!-e $startDir) {
  64   print "\nThe start directory '$startDir' does not exist.\n";
  65   ShowUsage();
  66   exit;
  67 }
  68 my $avoidList = shift;
  69 if (defined($avoidList) && !-e $avoidList) {
  70   print qq!
  71 The list of files to avoid '$avoidList' does not exist.
  72 !;
  73   ShowUsage();
  74   exit;
  75 }
  76 my @avoidList;
  77 # read the avoidList
  78 if (defined($avoidList)) {
  79   open(INPUT, "<$avoidList") or die "Could not open input file '$avoidList': $!";
  80   while (<INPUT>) {
  81     next if (/^#/);
  82     chomp;
  83     push(@avoidList, $_);
  84   }
  85   close INPUT;
  86 }
  87 
  88 #--------------------------------------------------
  89 # Do the work
  90 
  91 chdir "$startDir" or die "Cannot cd to '$startDir': $!\n";
  92 
  93 if ($opt_p) { print "\nDoing practice run. No files will be written\n"; }
  94 print qq!
  95 AL-20 = Apache License 2.0 with original Copyright line.
  96 AL-20a = Apache License 2.0 with original Copyright line and "or its licensors".
  97 AL-20b = Apache License 2.0 with no Copyright line, i.e. the current style.
  98 AL-20x = same as AL-20b but missing the word "contributor".
  99 ----------------------
 100 
 101 !;
 102 
 103 my $newLicenseHeader = <<"EOT";
 104 Licensed to the Apache Software Foundation (ASF) under one or more
 105 LEADERcontributor license agreements.  See the NOTICE file distributed with
 106 LEADERthis work for additional information regarding copyright ownership.
 107 LEADERThe ASF licenses this file to you under the Apache License, Version 2.0
 108 LEADER(the "License"); you may not use this file except in compliance with
 109 LEADERthe License.  You may obtain a copy of the License at
 110 EOT
 111 chomp($newLicenseHeader);
 112 
 113 #--------------------------------------------------
 114 sub process_file {
 115   return unless -f && -T; # process only text files
 116   my $fileName = $File::Find::name;
 117   my ($file, $dir, $ext) = fileparse($fileName, qr/\.[^.]*/);
 118   return if ($dir =~ /\/\.svn\//); # skip SVN directories
 119   return if ($dir =~ /\/CVS\//); # skip CVS directories
 120   return if ($dir =~ /\/build\//); # skip build directories
 121   return if ($file =~ /^\./); # skip hidden files
 122   $fileName =~ s/^\.\///; # strip leading ./
 123   my $pathName = $startDir . "/" . $fileName;
 124   foreach my $avoidFn (@avoidList) {
 125     if ($pathName =~ /$avoidFn/) {
 126       $countAvoid++;
 127       return;
 128     }
 129   }
 130   $countTotal++;
 131   if ($opt_v) { print "$fileName : "; }
 132   my $licenseType = "";
 133   my $updatedLicenseHeader = "";
 134   my $commentLeader = "";
 135   open(INPUT, "<$pathName") or die "Could not open input file '$pathName': $!";
 136   undef $/;  # slurp the whole file
 137   my $content = <INPUT>;
 138   $/ = "\n"; # reset input record separator
 139   # we want our matches to happen only in the top part of the file
 140   my $headContent = substr($content, 0, 1200);
 141   my $tailContent = "";
 142   if (length($content) > 1200) {
 143     $tailContent = substr($content, 1200);
 144   }
 145   # detect various existing licenses and the comment leader
 146   LICENSE_CASE: {
 147     if ($headContent =~ /Apache Software Foundation \(ASF\) under one/) {
 148       if ($headContent !~ /contributor license agreements\.[ ]+See the NOTICE/) {
 149         # detect an error with a previous version of this script
 150         $countLicense20x++; $licenseType = "AL-20x";
 151       }
 152       else {
 153         $countLicense20b++; $licenseType = "AL-20b";
 154       }
 155       last LICENSE_CASE;
 156     }
 157     if ($headContent =~ /Licensed under the Apache License.*Version 2.0/) {
 158       if ($headContent =~ /Apache Software Foundation or its licensors/) {
 159         $countLicense20a++; $licenseType = "AL-20a";
 160       }
 161       else {
 162         $countLicense20++; $licenseType = "AL-20";
 163       }
 164       if ($headContent =~ /(.*)Copyright ([0-9-, ]+) The Apache Software Foundation/) {
 165         $commentLeader = $1;
 166       }
 167       last LICENSE_CASE;
 168     }
 169     # catchall
 170     $countLicenseMissing++;
 171     if ($opt_v) { print "WARN: not Apache License"; }
 172   }
 173   if ($opt_v && $licenseType) { print "found $licenseType"; }
 174 
 175   if (!$licenseType || $licenseType eq "AL-20b") {
 176     if ($opt_v) { print ", skipping\n"; }
 177     close INPUT;
 178     return;
 179   }
 180   close INPUT;
 181   $updatedLicenseHeader = $newLicenseHeader;
 182   $updatedLicenseHeader =~ s/^LEADER/$commentLeader/gosm;
 183   my $headerUpdated = 0;
 184   if ($opt_v) { print "\n"; }
 185   UPDATE_CASE: {
 186     if ($headContent =~ s#Copyright ([0-9-, ]+) The Apache Software Foundation.*You may obtain a copy of the License at$#$updatedLicenseHeader#osm) {
 187       $headerUpdated = 1;
 188       last UPDATE_CASE;
 189     }
 190     if ($licenseType eq "AL-20x") {
 191       if ($headContent =~ s#(license agreements\.[ ]+See the NOTICE)#contributor $1#) {
 192         $headerUpdated = 1;
 193         last UPDATE_CASE;
 194       }
 195     }
 196     print "ERROR: Could not replace license header pattern\n";
 197   }
 198   if (!$opt_p && $headerUpdated) {
 199     open(OUTPUT, ">$pathName") or die ("Could not open output file '$pathName': $!\n");
 200     print OUTPUT $headContent . $tailContent;
 201     close OUTPUT;
 202   }
 203 }
 204 find(\&process_file, ".");
 205 
 206 #--------------------------------------------------
 207 # report some statistics
 208 print qq!
 209 ----------------------
 210 Processed $countTotal text files:
 211 $countLicense20b files already had AL-20b and were skipped.
 212 $countLicense20x files had AL-20x and were updated.
 213 $countLicense20a files had AL-20a and were updated.
 214 $countLicense20 files had the original AL-20 and were updated.
 215 $countLicenseMissing files were not Apache License 2.0 (or had no license) and were skipped.
 216 !;
 217 if (defined($avoidList)) {
 218   print "Avoided $countAvoid files as specified in the avoidList\n";
 219 }
 220 print "\n";
 221 
 222 if ($opt_p) { print "Finished practice run.\n"; }
 223 
 224 #==================================================
 225 # ShowUsage
 226 #==================================================
 227 
 228 sub ShowUsage {
 229   print qq!
 230 Usage: $0 [-h] [-p] [-v] startDir [avoidList] > logfile
 231                                                                                 
 232   where:
 233   startDir = The directory (pathname) to start processing. Will descend.
 234   avoidList = List of files or directories to avoid, one per line.
 235 
 236   option:
 237   h = Show this help message.
 238   v = Be verbose.
 239   p = Do a practice run. Do not write any files.
 240 
 241 Note: It will skip directories with name /build/
 242 Add other patterns to the avoidList.
 243 
 244 !;
 245 }

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

You are not allowed to attach a file to this page.