Attachment 'insert_license.pl'

Download

   1 #!/usr/bin/perl -w
   2 
   3 # Licensed to the Apache Software Foundation (ASF) under one or more
   4 # contributor license agreements.  See the NOTICE file distributed with
   5 # this work for additional information regarding copyright ownership.
   6 # The ASF licenses this file to You under the Apache License, Version 2.0
   7 # (the "License"); you may not use this file except in compliance with
   8 # the License.  You may obtain a copy of the License at
   9 #
  10 #     http://www.apache.org/licenses/LICENSE-2.0
  11 #
  12 # Unless required by applicable law or agreed to in writing, software
  13 # distributed under the License is distributed on an "AS IS" BASIS,
  14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 # See the License for the specific language governing permissions and
  16 # limitations under the License.
  17 #
  18 #=========================================================
  19 #
  20 # For files that do not yet have an Apache License, insert the 2.0 license.
  21 # Adds comment markers for the relevant file type.
  22 #
  23 # This can also be used to provide a summary of the current situation.
  24 # It will detect the presence of various different license headers.
  25 # Use the -p option for practice mode.
  26 #
  27 # Limitations:
  28 # - Only developed and tested for certain file types. Others will be
  29 # reported and skipped.
  30 # Needs tweaks for other types (see "configuration" section below).
  31 # - Only inserts missing licenses and detects and reports other license types.
  32 #  See ./update-AL20.pl to update to the current license style.
  33 #
  34 # Caveats:
  35 # - As usual, make a backup of your tree first or be prepared to 'svn revert -R'
  36 # your working copy if the script stuffs up.
  37 #
  38 # WARNING: Be sure to look at the output of this script for warnings.
  39 # WARNING: Be sure to do the normal 'svn diff' and review.
  40 # Attend to the warning in tools/copy2license.pl about "collective copyright".
  41 #
  42 # Developed only for UNIX, YMMV.
  43 #
  44 # Procedure:
  45 # See ./relicense.txt for an example procedure.
  46 # Use -p for practise mode.
  47 # Run the script. It will descend the directory tree.
  48 # Run with no parameters or -h to show usage.
  49 #
  50 #=========================================================
  51 
  52 use strict;
  53 use vars qw($opt_h $opt_p);
  54 use Getopt::Std;
  55 use File::Basename;
  56 use File::Find;
  57 
  58 #--------------------------------------------------
  59 # ensure proper usage
  60 getopts("hp");
  61 if ((scalar @ARGV < 1) || defined($opt_h)) {
  62   ShowUsage();
  63   exit;
  64 }
  65 
  66 my $startDir = shift;
  67 my $avoidList = shift;
  68 if (!-e $startDir) {
  69   print STDERR qq!
  70 The start directory '$startDir' does not exist.
  71 !;
  72   ShowUsage();
  73   exit;
  74 }
  75 if (defined($avoidList) && !-e $avoidList) {
  76   print STDERR qq!
  77 The list of files to avoid '$avoidList' does not exist.
  78 !;
  79   ShowUsage();
  80   exit;
  81 }
  82 if ($opt_p) { print STDERR "\nDoing practice run. No files will be written\n"; }
  83 print qq!
  84 AL-20 = Apache License 2.0 with original Copyright line.
  85 AL-20a = Apache License 2.0 with original Copyright line and "or its licensors".
  86 AL-20b = Apache License 2.0 with no Copyright line, i.e. the current style.
  87 ----------------------
  88 
  89 !;
  90 
  91 #--------------------------------------------------
  92 # do some configuration
  93 my $license = qq!Licensed to the Apache Software Foundation (ASF) under one or more
  94 contributor license agreements.  See the NOTICE file distributed with
  95 this work for additional information regarding copyright ownership.
  96 The ASF licenses this file to You under the Apache License, Version 2.0
  97 (the "License"); you may not use this file except in compliance with
  98 the License.  You may obtain a copy of the License at
  99 
 100     http://www.apache.org/licenses/LICENSE-2.0
 101 
 102 Unless required by applicable law or agreed to in writing, software
 103 distributed under the License is distributed on an "AS IS" BASIS,
 104 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 105 See the License for the specific language governing permissions and
 106 limitations under the License.
 107 !;
 108 my @license = split(/\n/, $license);
 109 
 110 # build a hash of filename extensions to be processed
 111 # together with the particular style of comment marker to use.
 112 my @xmlFileTypes = (
 113   ".xml", ".xsl", ".xslt", ".xmap", ".xcat",
 114   ".xmap", ".xconf", ".xroles", ".roles", ".xsp", ".rss",
 115   ".xlog", ".xsamples", ".xtest", ".xweb", ".xwelcome",
 116   ".samplesxconf", ".samplesxpipe", ".svg", ".xhtml", ".gt", ".jx", ".jmx",
 117   ".jdo", ".orm", ".jdoquery",
 118   ".jxt", ".meta", ".pagesheet", ".stx", ".xegrm", ".xgrm", ".xlex", ".xmi",
 119   ".xsd", ".rng", ".rdf", ".rdfs", ".xul", ".tld", ".xxe", ".ft", ".fv",
 120   ".wsdd", ".wsdl", ".xlog", ".pom",
 121 );
 122 my @sgmlFileTypes = (
 123   ".dtd", ".mod", ".sgml", ".sgm",
 124 );
 125 my @htmlFileTypes = (
 126   ".html", ".htm", ".jsp", ".ihtml",
 127 );
 128 my @cFileTypes = (
 129   ".java", ".js", ".c", ".h", ".cpp", ".cc", ".cs", ".css", ".egrm", ".grm",
 130   ".javascript", ".jj", ".gy", ".g",
 131 );
 132 my @shFileTypes = (
 133   ".sh", ".ccf", ".pl", ".py", ".sed", ".awk",
 134 );
 135 my @propertiesFileTypes = (
 136   ".properties", ".rnc", ".rnx", ".prefs",
 137 # JDO extension
 138   ".conf", ".jdoproperties", ".list", ".sav",
 139 );
 140 my @dosFileTypes = (
 141   ".bat", ".cmd",
 142 );
 143 my @sqlFileTypes = (
 144   ".script", ".sql",
 145 );
 146 my @vmFileTypes = (
 147   ".vm",
 148 );
 149 my @ignoreFileTypes = (
 150   ".txt", ".dcl", ".ent", ".pen",
 151 # JDO extension
 152   ".mdl", ".uml",
 153 );
 154 my (%fileTypes, $fileType);
 155 foreach $fileType (@xmlFileTypes) {
 156   $fileTypes{$fileType}{type} = "xml";
 157   $fileTypes{$fileType}{openComment} = "<!--\n";
 158   $fileTypes{$fileType}{leaderComment} = "  ";
 159   $fileTypes{$fileType}{closeComment} = "-->\n";
 160   # insert after line 1 which must be the xml declaration
 161   $fileTypes{$fileType}{insertionPoint} = "1";
 162 }
 163 foreach $fileType (@sgmlFileTypes) {
 164   $fileTypes{$fileType}{type} = "sgml";
 165   $fileTypes{$fileType}{openComment} = "<!--\n";
 166   $fileTypes{$fileType}{leaderComment} = "  ";
 167   $fileTypes{$fileType}{closeComment} = "-->\n";
 168   # insert at very top of file
 169   $fileTypes{$fileType}{insertionPoint} = "0";
 170 }
 171 foreach $fileType (@htmlFileTypes) {
 172   $fileTypes{$fileType}{type} = "html";
 173   $fileTypes{$fileType}{openComment} = "<!--\n";
 174   $fileTypes{$fileType}{leaderComment} = "  ";
 175   $fileTypes{$fileType}{closeComment} = "-->\n";
 176   # insert at very top of file
 177   $fileTypes{$fileType}{insertionPoint} = "0";
 178 }
 179 foreach $fileType (@cFileTypes) {
 180   $fileTypes{$fileType}{type} = "C";
 181   $fileTypes{$fileType}{openComment} = "/*\n";
 182   $fileTypes{$fileType}{leaderComment} = "* ";
 183   $fileTypes{$fileType}{closeComment} = "*/\n";
 184   # insert at very top of file
 185   $fileTypes{$fileType}{insertionPoint} = "0";
 186 }
 187 foreach $fileType (@shFileTypes) {
 188   $fileTypes{$fileType}{type} = "sh";
 189   $fileTypes{$fileType}{openComment} = "\n";
 190   $fileTypes{$fileType}{leaderComment} = "# ";
 191   $fileTypes{$fileType}{closeComment} = "\n";
 192   # insert after line 1 which must be #! script invocation
 193   $fileTypes{$fileType}{insertionPoint} = "1";
 194 }
 195 foreach $fileType (@propertiesFileTypes) {
 196   $fileTypes{$fileType}{type} = "properties";
 197   $fileTypes{$fileType}{openComment} = "";
 198   $fileTypes{$fileType}{leaderComment} = "# ";
 199   $fileTypes{$fileType}{closeComment} = "\n";
 200   # insert at very top of file
 201   $fileTypes{$fileType}{insertionPoint} = "0";
 202 }
 203 foreach $fileType (@dosFileTypes) {
 204   $fileTypes{$fileType}{type} = "dos";
 205   $fileTypes{$fileType}{openComment} = "\@echo off\n";
 206   $fileTypes{$fileType}{leaderComment} = "rem ";
 207   $fileTypes{$fileType}{closeComment} = "\n";
 208   # insert at very top of file
 209   $fileTypes{$fileType}{insertionPoint} = "0";
 210 }
 211 foreach $fileType (@sqlFileTypes) {
 212   $fileTypes{$fileType}{type} = "sql";
 213   $fileTypes{$fileType}{openComment} = "";
 214   $fileTypes{$fileType}{leaderComment} = "-- ";
 215   $fileTypes{$fileType}{closeComment} = "\n";
 216   # insert at very top of file
 217   $fileTypes{$fileType}{insertionPoint} = "0";
 218 }
 219 foreach $fileType (@vmFileTypes) {
 220   $fileTypes{$fileType}{type} = "vm";
 221   $fileTypes{$fileType}{openComment} = "#*\n";
 222   $fileTypes{$fileType}{leaderComment} = "  ";
 223   $fileTypes{$fileType}{closeComment} = "*#\n";
 224   # insert after line 1 which must be the xml declaration
 225   $fileTypes{$fileType}{insertionPoint} = "1";
 226 }
 227 
 228 my ($countTotal, $countUnknownType, $countIgnoreType) = (0, 0, 0);
 229 my ($countXmlDeclMissing, $countInserted, $countAvoid) = (0, 0, 0);
 230 my ($countLicense, $countLicense10, $countLicense11, $countLicense12) = (0, 0, 0, 0);
 231 my ($countLicensePD, $countLicenseOther) = (0, 0);
 232 my ($countLicense20, $countLicense20a, $countLicense20b) = (0, 0, 0);
 233 
 234 # 3rdParty users of an Apache License
 235 my ($countLicenseF20, $countLicenseF11, $countLicenseF12) = (0, 0, 0);
 236 
 237 my $dualLicensesDetected = 0;
 238 my %uniqueSuffixes;
 239 my @avoidList;
 240 
 241 # read the avoidList
 242 if (defined($avoidList)) {
 243   open(INPUT, "<$avoidList") or die "Could not open input file '$avoidList': $!";
 244   while (<INPUT>) {
 245     next if (/^#/);
 246     chomp;
 247     push(@avoidList, $_);
 248   }
 249   close INPUT;
 250 }
 251 
 252 #--------------------------------------------------
 253 sub process_file {
 254   return unless -f && -T; # process only text files
 255   my $fileName = $File::Find::name;
 256   my ($file, $dir, $ext) = fileparse($fileName, qr/\.[^.]*/);
 257   return if ($dir =~ /\/CVS\//); # skip CVS directories
 258   return if ($dir =~ /\/\.svn\//); # skip SVN directories
 259   return if ($fileName =~ /.cvsignore/); # skip 
 260   return if ($file =~ /^\./); # skip hidden files
 261   foreach my $avoidFn (@avoidList) {
 262     if ($fileName =~ /$avoidFn/) {
 263       $countAvoid++;
 264       return;
 265     }
 266   }
 267   $countTotal++;
 268   if ($ext eq "") { $ext = "NoExtension"; }
 269   $uniqueSuffixes{$ext}++;
 270   print "$fileName, ";
 271   my $tmpFile = $fileName . ".tmp";
 272   open(INPUT, "<$fileName") or die "Could not open input file '$fileName': $!";
 273 
 274   # First do some tests on the file to ensure it does not already have a license
 275   # and ensure that XML files have an xml declaration.
 276   my ($existsLicense, $warnDualLicense, $existsXmlDecl) = (0, 0, 0);
 277   my ($warnAL20OldLicense) = 0;
 278   my ($warnAL20aOldLicense) = 0;
 279   my $licenseType = "";
 280   undef $/;  # slurp the whole file
 281   my $content = <INPUT>;
 282   # we want our matches to happen only in the top part of the file
 283 # NOTE: You may want to relax this from time-to-time to find
 284 # all possible dual-license issues.
 285   my $headContent = substr($content, 0, 1500);
 286   $headContent =~ s/[ \t]+/ /g;
 287 
 288   # detect various existing licenses
 289   LICENSE_CASE: {
 290     if ($headContent =~ /Licensed to the Apache Software Foundation \(ASF\) under/) {
 291       $existsLicense = 1; $countLicense++;
 292       $countLicense20b++; $licenseType = "AL-20b";
 293       last LICENSE_CASE;
 294     }
 295     if ($headContent =~ /Licensed under the Apache License.*Version 2.0/) {
 296       $existsLicense = 1; $countLicense++;
 297       if ($headContent =~ /Apache Software Foundation or its licensors/) {
 298         $countLicense20a++; $licenseType = "AL-20a";
 299         $warnAL20aOldLicense = 1;
 300       }
 301       else {
 302         if ($headContent =~ /Copyright.*Apache Software Foundation/) {
 303           $countLicense20++; $licenseType = "AL-20";
 304           $warnAL20OldLicense = 1;
 305         }
 306         else {
 307           $countLicenseF20++; $licenseType = "F-AL-20";
 308         }
 309       }
 310       last LICENSE_CASE;
 311     }
 312     if ($headContent =~ /The Apache Software License.*Version 1.2/) {
 313       $existsLicense = 1; $countLicense++;
 314       if ($headContent =~ /Copyright.*Apache Software Foundation/) {
 315         $countLicense12++; $licenseType = "AL-12";
 316       }
 317       else {
 318         $countLicenseF12++; $licenseType = "F-AL-12";
 319       }
 320       last LICENSE_CASE;
 321     }
 322     if ($headContent =~ /The Apache Software License.*Version 1.1/) {
 323       $existsLicense = 1; $countLicense++;
 324       if ($headContent =~ /Copyright.*Apache Software Foundation/) {
 325         $countLicense11++; $licenseType = "AL-11";
 326       }
 327       else {
 328         $countLicenseF11++; $licenseType = "F-AL-11";
 329       }
 330       last LICENSE_CASE;
 331     }
 332     if ($headContent =~ /Copyright.*The Apache Group/) {
 333       $countLicense10++; $licenseType = "AL-10";
 334       $existsLicense = 1; $countLicense++;
 335       last LICENSE_CASE;
 336     }
 337     if ($headContent =~ /Public Domain.*/i) {
 338       $countLicensePD++; $licenseType = "PublicDomain";
 339       $existsLicense = 1; $countLicense++;
 340       last LICENSE_CASE;
 341     }
 342     # catchall
 343     if ($headContent =~ /Copyright|\(c\)/i) {
 344       # do process xml files that have a copyright attribute
 345       last LICENSE_CASE if ($headContent =~ /copyright=/i);
 346       # do process DTD files that have a copyright attribute
 347       last LICENSE_CASE if ($headContent =~ /copyright CDATA/i);
 348       # do process css files that have a .copyright section
 349       last LICENSE_CASE if ($headContent =~ /\.copyright/i);
 350       # do process files that just talk about copyright
 351       last LICENSE_CASE if ($headContent =~ /copyright statement/i);
 352       $countLicenseOther++; $licenseType = "Other";
 353       $existsLicense = 1; $countLicense++;
 354       last LICENSE_CASE;
 355     }
 356     # catchall
 357     if ($headContent =~ /re[ -]*distribut/i) {
 358       $countLicenseOther++; $licenseType = "Other";
 359       $existsLicense = 1; $countLicense++;
 360       last LICENSE_CASE;
 361     }
 362   }
 363 
 364   # Try to detect if a new AL-20 license has been accidently inserted
 365   # as well as having some other license.
 366   # FIXME: If a practice run reveals more types of Foregin copyright
 367   # then add patterns here.
 368   if ($licenseType =~ /AL-20/) {
 369     if (($headContent =~ /Rights Reserved/i) ||
 370         ($headContent =~ /Public Domain/i) ||
 371         ($headContent =~ /Copyright.*Copyright/i)) {
 372       $warnDualLicense = 1; $dualLicensesDetected++;
 373     }
 374   }
 375 
 376   # ensure that xml files have an xml declaration
 377   if ($headContent =~ /^<\?xml/) { $existsXmlDecl = 1; }
 378 
 379   $/ = "\n"; # reset input record separator
 380 
 381   my $recognisedFileType = 0; my $thisFileType = "unknown";
 382   foreach $fileType (keys %fileTypes) {
 383     if ($fileType eq $ext) {
 384       $recognisedFileType = 1;
 385       $thisFileType = $fileTypes{$fileType}{type};
 386       last;
 387     }
 388   }
 389   print "extension=$ext, fileType=$thisFileType, ";
 390   if (!$existsXmlDecl && ($thisFileType eq "xml")) {
 391     print "XML file does not have XML Declaration so skipping\n";
 392     $countXmlDeclMissing++;
 393     return;
 394   }
 395   if ($existsLicense) {
 396     if ($licenseType !~ /^AL/) { print "WARN: "; }
 397     print "Found existing license (licenseType=$licenseType) so skipping";
 398     if ($warnAL20OldLicense) { print ", WARN: old AL-20 copyright notice"; }
 399     if ($warnAL20aOldLicense) { print ", WARN: old AL-20a copyright notice"; }
 400     if ($warnDualLicense) { print ", WARN: dual license"; }
 401     print "\n";
 402     return;
 403   }
 404   foreach $fileType (@ignoreFileTypes) {
 405     if ($fileType eq $ext) {
 406       $countIgnoreType++;
 407       print "ignored, ";
 408     }
 409   }
 410   if (!$recognisedFileType) {
 411     print "File type '$ext' is not recognised so skipping\n";
 412     $countUnknownType++;
 413     return;
 414   }
 415 
 416   # Now process the file.
 417   my $insertionDone = 0; my ($line, $thisLine);
 418   if (!$opt_p) {
 419     open(OUTPUT, ">$tmpFile")
 420       or die "Could not open output file '$tmpFile': $!";
 421   }
 422   $countInserted++;
 423   if ($fileTypes{$ext}{insertionPoint} == 0) {
 424     print "Insert new license\n";
 425     if (!$opt_p) {
 426       print OUTPUT $fileTypes{$ext}{openComment};
 427       foreach $line (@license) {
 428         $thisLine = $fileTypes{$ext}{leaderComment} . $line;
 429         $thisLine =~ s/\s+$//;
 430         print OUTPUT $thisLine, "\n";
 431       }
 432       print OUTPUT $fileTypes{$ext}{closeComment};
 433     }
 434     $insertionDone = 1;
 435   }
 436   seek(INPUT, 0, 0); $. = 0; # rewind to top of file
 437   while (<INPUT>) {
 438     if (!$opt_p) {
 439       print OUTPUT $_ or die "Could not write output file '$fileName': $!";
 440     }
 441     if (!$insertionDone) {
 442       if ($. == $fileTypes{$ext}{insertionPoint}) {
 443         print "Insert new license\n";
 444         if (!$opt_p) {
 445           print OUTPUT $fileTypes{$ext}{openComment};
 446           foreach $line (@license) {
 447             $thisLine = $fileTypes{$ext}{leaderComment} . $line;
 448             $thisLine =~ s/\s+$//;
 449             print OUTPUT $thisLine, "\n";
 450           }
 451           print OUTPUT $fileTypes{$ext}{closeComment};
 452         }
 453         $insertionDone = 1;
 454       }
 455     }
 456   }
 457   close INPUT or die "Could not close input file '$fileName': $!";
 458   if (!$opt_p) {
 459     close OUTPUT or die "Could not close output file '$tmpFile': $!";
 460     rename($tmpFile, $fileName);
 461   }
 462 }
 463 find(\&process_file, $startDir);
 464 
 465 #--------------------------------------------------
 466 # Report some statistics
 467 my $statsMsg = "were";
 468 if ($opt_p) { $statsMsg = "would be"; }
 469 $countUnknownType -= $countIgnoreType;
 470 print STDERR qq!
 471 Total $countTotal text files were investigated.
 472 New licenses $statsMsg inserted in $countInserted files.
 473 Skipped $countLicense files with an existing license:
 474  (Apache v2.0=$countLicense20, v2.0a=$countLicense20a, v2.0b=$countLicense20b)
 475  (Apache v1.2=$countLicense12, v1.1=$countLicense11, v1.0=$countLicense10)
 476  (Other=$countLicenseOther, PublicDomain=$countLicensePD)
 477  (3rdParty using AL v2.0=$countLicenseF20, v1.2=$countLicenseF12, v1.1=$countLicenseF11)
 478 Skipped $countXmlDeclMissing XML files with missing XML Declaration.
 479 !;
 480 if (defined($avoidList)) {
 481   print STDERR "Avoided $countAvoid files as specified in the avoidList\n";
 482 }
 483 print STDERR qq!
 484 Ignored $countIgnoreType files of specified type (@ignoreFileTypes)
 485 Skipped $countUnknownType files of unknown type.
 486 !;
 487 if ($dualLicensesDetected) {
 488   print STDERR qq!
 489 WARNING: $dualLicensesDetected files had another license as well as the new
 490 Apache v2.0 license. (Scan the log output for lines with "WARN: dual".)
 491 !;
 492 }
 493 my $suffix;
 494 if ($countUnknownType > 0) {
 495   print STDERR qq!
 496 List of unknown filename extensions and ignored filename extensions:
 497 (Add new fileTypes to this script if you want them to be catered for.)
 498 !;
 499   foreach $suffix ( sort keys %uniqueSuffixes) {
 500     my $suffixKnown = 0;
 501     foreach $fileType (keys %fileTypes) {
 502       if ($suffix eq $fileType) { $suffixKnown = 1; }
 503     }
 504     if (!$suffixKnown) {
 505       print STDERR "$suffix=$uniqueSuffixes{$suffix} ";
 506     }
 507   }
 508   print STDERR "\n\n";
 509 }
 510 print STDERR "List of all unique filename extensions:\n";
 511 foreach $suffix ( sort keys %uniqueSuffixes) {
 512   print STDERR "$suffix=$uniqueSuffixes{$suffix} ";
 513 }
 514 print STDERR "\n\n";
 515 if ($opt_p) { print STDERR "Finished practice run.\n"; }
 516 
 517 #==================================================
 518 # ShowUsage
 519 #==================================================
 520 
 521 sub ShowUsage {
 522   print STDERR qq!
 523 Usage: $0 [-h] [-p] startDir [avoidList] > logfile
 524                                                                                 
 525   where:
 526   startDir = The SVN directory (pathname) to start processing. Will descend.
 527   avoidList = List of files and directories to avoid, one per line.
 528 
 529   option:
 530   h = Show this help message.
 531   p = Do a practice run. Do not write any files.
 532 
 533 !;
 534 }

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

You are not allowed to attach a file to this page.