Attachment 'insert_license.pl'

Download

   1 #!/usr/bin/perl -w
   2 
   3 # Licensed to the Apache Software Foundation (ASF) under one or more
   4 # contributor license agreements.  See the NOTICE file distributed with
   5 # this work for additional information regarding copyright ownership.
   6 # The ASF licenses this file to You under the Apache License, Version 2.0
   7 # (the "License"); you may not use this file except in compliance with
   8 # the License.  You may obtain a copy of the License at
   9 #
  10 #     http://www.apache.org/licenses/LICENSE-2.0
  11 #
  12 # Unless required by applicable law or agreed to in writing, software
  13 # distributed under the License is distributed on an "AS IS" BASIS,
  14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 # See the License for the specific language governing permissions and
  16 # limitations under the License.
  17 #
  18 #=========================================================
  19 #
  20 # For files that do not yet have an Apache License, insert the 2.0 license.
  21 # Adds comment markers for the relevant file type.
  22 #
  23 # This can also be used to provide a summary of the current situation.
  24 # It will detect the presence of various different license headers.
  25 # Use the -p option for practice mode.
  26 #
  27 # Limitations:
  28 # - Only developed and tested for certain file types. Others will be
  29 # reported and skipped.
  30 # Needs tweaks for other types (see "configuration" section below).
  31 # - Only inserts missing licenses and detects and reports other license types.
  32 #  See ./update-AL20.pl to update to the current license style.
  33 #
  34 # Caveats:
  35 # - As usual, make a backup of your tree first or be prepared to 'svn revert -R'
  36 # your working copy if the script stuffs up.
  37 #
  38 # WARNING: Be sure to look at the output of this script for warnings.
  39 # WARNING: Be sure to do the normal 'svn diff' and review.
  40 # Attend to the warning in tools/copy2license.pl about "collective copyright".
  41 #
  42 # Developed only for UNIX, YMMV.
  43 #
  44 # Procedure:
  45 # See ./relicense.txt for an example procedure.
  46 # Use -p for practise mode.
  47 # Run the script. It will descend the directory tree.
  48 # Run with no parameters or -h to show usage.
  49 #
  50 #=========================================================
  51 
  52 use strict;
  53 use vars qw($opt_h $opt_p);
  54 use Getopt::Std;
  55 use File::Basename;
  56 use File::Find;
  57 
  58 #--------------------------------------------------
  59 # ensure proper usage
  60 getopts("hp");
  61 if ((scalar @ARGV < 1) || defined($opt_h)) {
  62   ShowUsage();
  63   exit;
  64 }
  65 
  66 my $startDir = shift;
  67 my $avoidList = shift;
  68 if (!-e $startDir) {
  69   print STDERR qq!
  70 The start directory '$startDir' does not exist.
  71 !;
  72   ShowUsage();
  73   exit;
  74 }
  75 if (defined($avoidList) && !-e $avoidList) {
  76   print STDERR qq!
  77 The list of files to avoid '$avoidList' does not exist.
  78 !;
  79   ShowUsage();
  80   exit;
  81 }
  82 if ($opt_p) { print STDERR "\nDoing practice run. No files will be written\n"; }
  83 print qq!
  84 AL-20 = Apache License 2.0 with original Copyright line.
  85 AL-20a = Apache License 2.0 with original Copyright line and "or its licensors".
  86 AL-20b = Apache License 2.0 with no Copyright line, i.e. the current style.
  87 ----------------------
  88 
  89 !;
  90 
  91 #--------------------------------------------------
  92 # do some configuration
  93 my $license = qq!Licensed to the Apache Software Foundation (ASF) under one or more
  94 contributor license agreements.  See the NOTICE file distributed with
  95 this work for additional information regarding copyright ownership.
  96 The ASF licenses this file to you under the Apache License, Version 2.0
  97 (the "License"); you may not use this file except in compliance with
  98 the License.  You may obtain a copy of the License at
  99 
 100     http://www.apache.org/licenses/LICENSE-2.0
 101 
 102 Unless required by applicable law or agreed to in writing, software
 103 distributed under the License is distributed on an "AS IS" BASIS,
 104 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 105 See the License for the specific language governing permissions and
 106 limitations under the License.
 107 !;
 108 my @license = split(/\n/, $license);
 109 
 110 # build a hash of filename extensions to be processed
 111 # together with the particular style of comment marker to use.
 112 my @xmlFileTypes = (
 113   ".xml", ".xsl", ".xslt", ".xmap", ".xcat",
 114   ".xmap", ".xconf", ".xroles", ".roles", ".xsp", ".rss",
 115   ".xlog", ".xsamples", ".xtest", ".xweb", ".xwelcome",
 116   ".samplesxconf", ".samplesxpipe", ".svg", ".xhtml", ".jdo", ".gt", ".jx", ".jmx",
 117   ".jxt", ".meta", ".pagesheet", ".stx", ".xegrm", ".xgrm", ".xlex", ".xmi",
 118   ".xsd", ".rng", ".rdf", ".rdfs", ".xul", ".tld", ".xxe", ".ft", ".fv",
 119   ".wsdd", ".wsdl", ".xlog",
 120 );
 121 my @sgmlFileTypes = (
 122   ".dtd", ".mod", ".sgml", ".sgm",
 123 );
 124 my @htmlFileTypes = (
 125   ".html", ".htm", ".jsp", ".ihtml",
 126 );
 127 my @cFileTypes = (
 128   ".java", ".js", ".c", ".h", ".cpp", ".cc", ".cs", ".css", ".egrm", ".grm",
 129   ".javascript", ".jj", ".gy",
 130 );
 131 my @shFileTypes = (
 132   ".sh", ".ccf", ".pl", ".py", ".sed", ".awk", ".ksh"
 133 );
 134 my @propertiesFileTypes = (
 135   ".rnc", ".rnx", ".properties"
 136 );
 137 my @dosFileTypes = (
 138   ".bat", ".cmd",
 139 );
 140 my @sqlFileTypes = (
 141   ".script",
 142 );
 143 my @textFileTypes = (
 144   ".txt",
 145 );
 146 my @ignoreFileTypes = (
 147   ".dcl", ".ent", ".pen", ".ant", ".tmpl", "NoExtension", ".sql"
 148 );
 149 my (%fileTypes, $fileType);
 150 foreach $fileType (@xmlFileTypes) {
 151   $fileTypes{$fileType}{type} = "xml";
 152   $fileTypes{$fileType}{openComment} = "<!--\n";
 153   $fileTypes{$fileType}{leaderComment} = "  ";
 154   $fileTypes{$fileType}{closeComment} = "-->\n";
 155   # insert after line 1 which must be the xml declaration
 156   $fileTypes{$fileType}{insertionPoint} = "1";
 157 }
 158 foreach $fileType (@sgmlFileTypes) {
 159   $fileTypes{$fileType}{type} = "sgml";
 160   $fileTypes{$fileType}{openComment} = "<!--\n";
 161   $fileTypes{$fileType}{leaderComment} = "  ";
 162   $fileTypes{$fileType}{closeComment} = "-->\n";
 163   # insert at very top of file
 164   $fileTypes{$fileType}{insertionPoint} = "0";
 165 }
 166 foreach $fileType (@htmlFileTypes) {
 167   $fileTypes{$fileType}{type} = "html";
 168   $fileTypes{$fileType}{openComment} = "<!--\n";
 169   $fileTypes{$fileType}{leaderComment} = "  ";
 170   $fileTypes{$fileType}{closeComment} = "-->\n";
 171   # insert at very top of file
 172   $fileTypes{$fileType}{insertionPoint} = "0";
 173 }
 174 foreach $fileType (@cFileTypes) {
 175   $fileTypes{$fileType}{type} = "C";
 176   $fileTypes{$fileType}{openComment} = "/*\n";
 177   $fileTypes{$fileType}{leaderComment} = "* ";
 178   $fileTypes{$fileType}{closeComment} = "*/\n";
 179   # insert at very top of file
 180   $fileTypes{$fileType}{insertionPoint} = "0";
 181 }
 182 foreach $fileType (@shFileTypes) {
 183   $fileTypes{$fileType}{type} = "sh";
 184   $fileTypes{$fileType}{openComment} = "\n";
 185   $fileTypes{$fileType}{leaderComment} = "# ";
 186   $fileTypes{$fileType}{closeComment} = "\n";
 187   # insert after line 1 which must be #! script invocation
 188   $fileTypes{$fileType}{insertionPoint} = "1";
 189 }
 190 foreach $fileType (@propertiesFileTypes) {
 191   $fileTypes{$fileType}{type} = "properties";
 192   $fileTypes{$fileType}{openComment} = "";
 193   $fileTypes{$fileType}{leaderComment} = "# ";
 194   $fileTypes{$fileType}{closeComment} = "\n";
 195   # insert at very top of file
 196   $fileTypes{$fileType}{insertionPoint} = "0";
 197 }
 198 foreach $fileType (@dosFileTypes) {
 199   $fileTypes{$fileType}{type} = "dos";
 200   $fileTypes{$fileType}{openComment} = "\n";
 201   $fileTypes{$fileType}{leaderComment} = "\@REM ";
 202   $fileTypes{$fileType}{closeComment} = "\n";
 203   # insert at very top of file
 204   $fileTypes{$fileType}{insertionPoint} = "0";
 205 }
 206 foreach $fileType (@sqlFileTypes) {
 207   $fileTypes{$fileType}{type} = "sql";
 208   $fileTypes{$fileType}{openComment} = "";
 209   $fileTypes{$fileType}{leaderComment} = "-- ";
 210   $fileTypes{$fileType}{closeComment} = "\n";
 211   # insert at very top of file
 212   $fileTypes{$fileType}{insertionPoint} = "0";
 213 }
 214 foreach $fileType (@textFileTypes) {
 215   $fileTypes{$fileType}{type} = "text";
 216   $fileTypes{$fileType}{openComment} = "";
 217   $fileTypes{$fileType}{leaderComment} = "";
 218   $fileTypes{$fileType}{closeComment} = "\n";
 219   # insert at very top of file
 220   $fileTypes{$fileType}{insertionPoint} = "0";
 221 }
 222 
 223 my ($countTotal, $countUnknownType, $countIgnoreType) = (0, 0, 0);
 224 my ($countXmlDeclMissing, $countInserted, $countAvoid) = (0, 0, 0);
 225 my ($countLicense, $countLicense10, $countLicense11, $countLicense12) = (0, 0, 0, 0);
 226 my ($countLicensePD, $countLicenseOther) = (0, 0);
 227 my ($countLicense20, $countLicense20a, $countLicense20b) = (0, 0, 0);
 228 my $dualLicensesDetected = 0;
 229 my %uniqueSuffixes;
 230 my @avoidList;
 231 
 232 # read the avoidList
 233 if (defined($avoidList)) {
 234   open(INPUT, "<$avoidList") or die "Could not open input file '$avoidList': $!";
 235   while (<INPUT>) {
 236     next if (/^#/);
 237     chomp;
 238     push(@avoidList, $_);
 239   }
 240   close INPUT;
 241 }
 242 
 243 #--------------------------------------------------
 244 sub process_file {
 245   return unless -f && -T; # process only text files
 246   my $fileName = $File::Find::name;
 247   my ($file, $dir, $ext) = fileparse($fileName, qr/\.[^.]*/);
 248   return if ($dir =~ /\/CVS\//); # skip CVS directories
 249   return if ($dir =~ /\/\.svn\//); # skip SVN directories
 250   return if ($fileName =~ /.cvsignore/); # skip 
 251   return if ($file =~ /^\./); # skip hidden files
 252   foreach my $avoidFn (@avoidList) {
 253     if ($fileName =~ /$avoidFn/) {
 254       $countAvoid++;
 255       return;
 256     }
 257   }
 258   $countTotal++;
 259   if ($ext eq "") { $ext = "NoExtension"; }
 260   $uniqueSuffixes{$ext}++;
 261   print "$fileName, ";
 262   my $tmpFile = $fileName . ".tmp";
 263   open(INPUT, "<$fileName") or die "Could not open input file '$fileName': $!";
 264 
 265   # First do some tests on the file to ensure it does not already have a license
 266   # and ensure that XML files have an xml declaration.
 267   my ($existsLicense, $warnDualLicense, $existsXmlDecl) = (0, 0, 0);
 268   my ($warnAL20OldLicense) = 0;
 269   my ($warnAL20aOldLicense) = 0;
 270   my $licenseType = "";
 271   undef $/;  # slurp the whole file
 272   my $content = <INPUT>;
 273   # we want our matches to happen only in the top part of the file
 274 # NOTE: You may want to relax this from time-to-time to find
 275 # all possible dual-license issues.
 276   my $headContent = substr($content, 0, 1500);
 277 
 278   # detect various existing licenses
 279   LICENSE_CASE: {
 280     if ($headContent =~ /Licensed to the Apache Software Foundation \(ASF\) under/) {
 281       $existsLicense = 1; $countLicense++;
 282       $countLicense20b++; $licenseType = "AL-20b";
 283       last LICENSE_CASE;
 284     }
 285     if ($headContent =~ /Licensed under the Apache License.*Version 2.0/) {
 286       $existsLicense = 1; $countLicense++;
 287       if ($headContent =~ /Apache Software Foundation or its licensors/) {
 288         $countLicense20a++; $licenseType = "AL-20a";
 289         $warnAL20aOldLicense = 1;
 290       }
 291       else {
 292         $countLicense20++; $licenseType = "AL-20";
 293         $warnAL20OldLicense = 1;
 294       }
 295       last LICENSE_CASE;
 296     }
 297     if ($headContent =~ /The Apache Software License.*Version 1.2/) {
 298       $countLicense12++; $licenseType = "AL-12";
 299       $existsLicense = 1; $countLicense++;
 300       last LICENSE_CASE;
 301     }
 302     if ($headContent =~ /The Apache Software License.*Version 1.1/) {
 303       $countLicense11++; $licenseType = "AL-11";
 304       $existsLicense = 1; $countLicense++;
 305       last LICENSE_CASE;
 306     }
 307     if ($headContent =~ /Copyright.*The Apache Group/) {
 308       $countLicense10++; $licenseType = "AL-10";
 309       $existsLicense = 1; $countLicense++;
 310       last LICENSE_CASE;
 311     }
 312     if ($headContent =~ /Public Domain.*/i) {
 313       $countLicensePD++; $licenseType = "PublicDomain";
 314       $existsLicense = 1; $countLicense++;
 315       last LICENSE_CASE;
 316     }
 317     # catchall
 318     if ($headContent =~ /Copyright|\(c\)/i) {
 319       # do process xml files that have a copyright attribute
 320       last LICENSE_CASE if ($headContent =~ /copyright=/i);
 321       # do process DTD files that have a copyright attribute
 322       last LICENSE_CASE if ($headContent =~ /copyright CDATA/i);
 323       # do process css files that have a .copyright section
 324       last LICENSE_CASE if ($headContent =~ /\.copyright/i);
 325       # do process files that just talk about copyright
 326       last LICENSE_CASE if ($headContent =~ /copyright statement/i);
 327       $countLicenseOther++; $licenseType = "Other";
 328       $existsLicense = 1; $countLicense++;
 329       last LICENSE_CASE;
 330     }
 331     # catchall
 332     if ($headContent =~ /re[ -]*distribut/i) {
 333       $countLicenseOther++; $licenseType = "Other";
 334       $existsLicense = 1; $countLicense++;
 335       last LICENSE_CASE;
 336     }
 337   }
 338 
 339   # Try to detect if a new AL-20 license has been accidently inserted
 340   # as well as having some other license.
 341   # FIXME: If a practice run reveals more types of Foregin copyright
 342   # then add patterns here.
 343   if ($licenseType =~ /AL-20/) {
 344     if (($headContent =~ /Rights Reserved/i) ||
 345         ($headContent =~ /Public Domain/i) ||
 346         ($headContent =~ /Copyright.*Copyright/i)) {
 347       $warnDualLicense = 1; $dualLicensesDetected++;
 348     }
 349   }
 350 
 351   # ensure that xml files have an xml declaration
 352   if ($headContent =~ /^<\?xml/) { $existsXmlDecl = 1; }
 353 
 354   $/ = "\n"; # reset input record separator
 355 
 356   my $recognisedFileType = 0; my $thisFileType = "unknown";
 357   foreach $fileType (keys %fileTypes) {
 358     if ($fileType eq $ext) {
 359       $recognisedFileType = 1;
 360       $thisFileType = $fileTypes{$fileType}{type};
 361       last;
 362     }
 363   }
 364   print "extension=$ext, fileType=$thisFileType, ";
 365   if (!$existsXmlDecl && ($thisFileType eq "xml")) {
 366     print "XML file does not have XML Declaration so skipping\n";
 367     $countXmlDeclMissing++;
 368     return;
 369   }
 370   if ($existsLicense) {
 371     if ($licenseType !~ /^AL/) { print "WARN: "; }
 372     print "Found existing license (licenseType=$licenseType) so skipping";
 373     if ($warnAL20OldLicense) { print ", WARN: old AL-20 copyright notice"; }
 374     if ($warnAL20aOldLicense) { print ", WARN: old AL-20a copyright notice"; }
 375     if ($warnDualLicense) { print ", WARN: dual license"; }
 376     print "\n";
 377     return;
 378   }
 379   foreach $fileType (@ignoreFileTypes) {
 380     if ($fileType eq $ext) {
 381       $countIgnoreType++;
 382       print "ignored, ";
 383     }
 384   }
 385   if (!$recognisedFileType) {
 386     print "File type '$ext' is not recognised so skipping\n";
 387     $countUnknownType++;
 388     return;
 389   }
 390 
 391   # Now process the file.
 392   my $insertionDone = 0; my ($line, $thisLine);
 393   if (!$opt_p) {
 394     open(OUTPUT, ">$tmpFile")
 395       or die "Could not open output file '$tmpFile': $!";
 396   }
 397   $countInserted++;
 398   if ($fileTypes{$ext}{insertionPoint} == 0) {
 399     print "Insert new license\n";
 400     if (!$opt_p) {
 401       print OUTPUT $fileTypes{$ext}{openComment};
 402       foreach $line (@license) {
 403         $thisLine = $fileTypes{$ext}{leaderComment} . $line;
 404         $thisLine =~ s/\s+$//;
 405         print OUTPUT $thisLine, "\n";
 406       }
 407       print OUTPUT $fileTypes{$ext}{closeComment};
 408     }
 409     $insertionDone = 1;
 410   }
 411   seek(INPUT, 0, 0); $. = 0; # rewind to top of file
 412   while (<INPUT>) {
 413     if (!$opt_p) {
 414       print OUTPUT $_ or die "Could not write output file '$fileName': $!";
 415     }
 416     if (!$insertionDone) {
 417       if ($. == $fileTypes{$ext}{insertionPoint}) {
 418         print "Insert new license\n";
 419         if (!$opt_p) {
 420           print OUTPUT $fileTypes{$ext}{openComment};
 421           foreach $line (@license) {
 422             $thisLine = $fileTypes{$ext}{leaderComment} . $line;
 423             $thisLine =~ s/\s+$//;
 424             print OUTPUT $thisLine, "\n";
 425           }
 426           print OUTPUT $fileTypes{$ext}{closeComment};
 427         }
 428         $insertionDone = 1;
 429       }
 430     }
 431   }
 432   close INPUT or die "Could not close input file '$fileName': $!";
 433   if (!$opt_p) {
 434     close OUTPUT or die "Could not close output file '$tmpFile': $!";
 435     rename($tmpFile, $fileName);
 436   }
 437 }
 438 find(\&process_file, $startDir);
 439 
 440 #--------------------------------------------------
 441 # Report some statistics
 442 my $statsMsg = "were";
 443 if ($opt_p) { $statsMsg = "would be"; }
 444 $countUnknownType -= $countIgnoreType;
 445 print STDERR qq!
 446 Total $countTotal text files were investigated.
 447 New licenses $statsMsg inserted in $countInserted files.
 448 Skipped $countLicense files with an existing license:
 449  (Apache v2.0=$countLicense20, v2.0a=$countLicense20a, v2.0b=$countLicense20b)
 450  (Apache v1.2=$countLicense12, v1.1=$countLicense11, v1.0=$countLicense10)
 451  (Other=$countLicenseOther, PublicDomain=$countLicensePD)
 452 Skipped $countXmlDeclMissing XML files with missing XML Declaration.
 453 !;
 454 if (defined($avoidList)) {
 455   print STDERR "Avoided $countAvoid files as specified in the avoidList\n";
 456 }
 457 print STDERR qq!
 458 Ignored $countIgnoreType files of specified type (@ignoreFileTypes)
 459 Skipped $countUnknownType files of unknown type.
 460 !;
 461 if ($dualLicensesDetected) {
 462   print STDERR qq!
 463 WARNING: $dualLicensesDetected files had another license as well as the new
 464 Apache v2.0 license. (Scan the log output for lines with "WARN: dual".)
 465 !;
 466 }
 467 my $suffix;
 468 if ($countUnknownType > 0) {
 469   print STDERR qq!
 470 List of unknown filename extensions and ignored filename extensions:
 471 (Add new fileTypes to this script if you want them to be catered for.)
 472 !;
 473   foreach $suffix ( sort keys %uniqueSuffixes) {
 474     my $suffixKnown = 0;
 475     foreach $fileType (keys %fileTypes) {
 476       if ($suffix eq $fileType) { $suffixKnown = 1; }
 477     }
 478     if (!$suffixKnown) {
 479       print STDERR "$suffix=$uniqueSuffixes{$suffix} ";
 480     }
 481   }
 482   print STDERR "\n\n";
 483 }
 484 print STDERR "List of all unique filename extensions:\n";
 485 foreach $suffix ( sort keys %uniqueSuffixes) {
 486   print STDERR "$suffix=$uniqueSuffixes{$suffix} ";
 487 }
 488 print STDERR "\n\n";
 489 if ($opt_p) { print STDERR "Finished practice run.\n"; }
 490 
 491 #==================================================
 492 # ShowUsage
 493 #==================================================
 494 
 495 sub ShowUsage {
 496   print STDERR qq!
 497 Usage: $0 [-h] [-p] startDir [avoidList] > logfile
 498                                                                                 
 499   where:
 500   startDir = The SVN directory (pathname) to start processing. Will descend.
 501   avoidList = List of files and directories to avoid, one per line.
 502 
 503   option:
 504   h = Show this help message.
 505   p = Do a practice run. Do not write any files.
 506 
 507 !;
 508 }

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

You are not allowed to attach a file to this page.