#!/bin/bash # # 'idnits' looks for violations of Section 2.1 and 2.2 of the # requirements listed on http://www.ietf.org/ID-Checklist.html # # Release notes: # # 21 Aug 2005 v1.77 - In verbose mode, added indication of # where in a paragraph boilerplate mismatch was # found. # # 21 Aug 2005 v1.76 - Bugfix: Formfeeds occuring on the same # line as the page numbers were not counted, which # could result in erroneous warnings about # missing page breaks for some documents. # # 14 Jul 2005 v1.75 - Added a "Draft ..." as an acceptable # variation on page header. # # 21 Jun 2005 v1.74 - Better tests for max and min values of # COLUMNS. # # 16 Jun 2005 v1.73 - Fixed various nits in the tool, after # comments from Bruce Lilly: # * Adapting to terminal width is now only done # up to a maximum width of 76, and down to 16 # * Some grammar corrections # * The column width setting, whether given by # the --width switch, by setting COLUMNS in the # environment, or by running in a narrow terminal # window, now also applies to boilerplate text # * The line folding algorithm has been enhanced # to not give up as easily # * All normal output is now folded to match the # width setting - previously some output lines # didn't. Debug output is not folded. # * Added additional accepted variations on # the names of normative and informative sections # # 17 May 2005 v1.72 - Added a check for correct year in # copyright boilerplate. # # 02 May 2005 v1.71 - Changed warnings for non-3978 # boilerplate to errors # # 23 Apr 2005 v1.70 - Changes to discover 3667 boilerplate # also when preceded by "This document is an # Internet-Draft and ..." (so that a warning # about imminent expiration can be issued). # # 09 Apr 2005 v1.69 - In function lookfor(): revert from # "which" to "type -p" # # 08 Apr 2005 v1.68 - Wording tweaks. # # 08 Apr 2005 v1.67 - Added hyphenation warnings again, if it # has been possible to extract a list of # hyphenated words from the draft. Any fragment # of a hyphenated word will be accepted on a # line-ending, but not hyphenated line-breaks # which have a line-ending fragment which # doesn't occur in a hyphenated word somewhere # else in the document. # # 08 Apr 2005 v1.66 - "Weird spacing" warnings could result # from a long title in the header line, which is # a mistake. Fixed. Also tweaked the code to # find the awk executable in order to provide a # default value for the case when 'which' is not # available on the target system. Started some # work on extracting a list of valid words from # the document, for use in validation of # hypenated line-breaks. # # 06 Apr 2005 v1.65 - Disabled hyphenation warnings also in verbose # mode. One slight tweak to suit cygwin better. # # 05 Apr 2005 v1.64 - Para. 2 of the 3978 Section 5.4 # boilerplate wasn't recognised when occuring in # a separate paragraph. Fixed, thanks to Bruce # Lilly. # # 04 Apr 2005 v1.63 - A warning has been added for drafts # which use 3667 boilerplate instead of 3978 # boilerplate (where those differ). The 3978 # boilerplate is now also required to be # verbatim, with none of the slight variations # which was accepted earlier (e.g., "he" in stead # of "he or she", or "the author" instead of # "each author"). # # 09 Mar 2005 v1.62 - RFC 3978 has the copyright notice and # the reference to BCP 78 in separate paragraphs. # Fixed this. Also added "export LC_ALL=C" to # guard against bugs due to untested locale # settings. # # 02 Mar 2005 v1.61 - Changed references to 3667/3668 to 3978/3979 # # 21 Feb 2005 v1.60 - Disabled the hyphenation warnings. The new # xml2rfc (v1.29rc3) does line-breaking on hyphens, # which is good, but makes it necessary to find a more # sophisticated algorithm in order not to produce # too many spurious warnings. # # 18 Dec 2004 v1.59 - Added normalisation of draft indentation, # to be less sensitive to oddly indented drafts. # # 18 Dec 2004 v1.58 - cygwin doesn't by default understand # the 'which' command - replaced it with 'type -p'. # # 11 Dec 2004 v1.57 - Fixed a line folding bug and corrected the # boilerplate output. # # 10 Dec 2004 v1.56 - Tweaked the boilerplate-matching # regexps a bit, to cover more valid variants. # (largely from a patch from Bruce Lilly) # # 10 Dec 2004 v1.55 - Fixed a rather annoying bug introduced # in 1.54, which resulted in the web-service # printing the full environment and program code # before getting around to printing the proper # output... # # 09 Dec 2004 v1.54 - Quite a bit of refactoring, and some # new features. # * If output is to a tty, line-wrapping of error # and warning messages is now done based on the # width (number of columns) of the controlling # tty, as reported by `stty size`. # * Indentation has been changed somewhat, and # nits are now prefaced with an asterisk (*) # while warnings are prefaced with a dash (-). # * Fixed an error where some tests would not be # run if multiple files were given as input, and # the first would have an rfc file name. # * Fixed several problems associated with using # nawk instead of gawk as awk interpreter. # Running under stock solaris, without gawk, should # now work. # * Added final full stop to a lot of sentences # * Fixed grammar of messages, so single and plural # instances of error and warnings get appropriate # verb and noun conjugations. # * Added a "--width " switch, mostly in # order to control regression testing. It # overrides the detected terminal width. # # 05 Dec 2004 v1.53 - Permitted IANA Considerations to be placed # as an appendix. # # 02 Dec 2004 v1.52 - Some minor message text changes. Added # regression tests so that now the Makefile runs # tests on darwin, linux and solaris for each new # release. Cygwin tests will come when I have a # windows box online. As a result, some issues # were discovered for solaris: # * The default awk is the original awk, so we # need to search for gawk and nawk. The # original awk unfortunately won't do. # * Even with nawk, there are some issues - # those have been fixed, so this version should # run on at least solaris 8 and higher without # any problems. # # 26 Nov 2004 v1.51 - Some restructuring and changes of # output layout to better present checks against # multiple requirements documents. Added # 1id_guidelines.txt check for: # * Additional required boilerplate # # 22 Nov 2004 v1.50 - Started process of adding checks for # all checkable nits - not only those from # ID-Checklist.html, but also: # rfc 2223 # draft-rfc-editor-rfc2223bis-08 # http://www.ietf.org/ietf/1id-guidelines.txt # # In this version, added the following: # # 1id-guidelines.txt: # * max 58 lines per page, followed by form feed # # [ The section on formatting is not checked by # this version, but is planned for a later version. # as are these items: # - "INTERNET-DRAFT" should appear in the upper # left-hand corner of the first page # - Does not use any of the terms Standard, # Proposed, Draft, Experimental, Historic, # Required, Recommended, Elective, or # Restricted in the title ] # # 15 Nov 2004 v1.49 - Re-wrote the linebreaking regexp as a # function to avoid awk complaints under OS X. # # 14 Nov 2004 v1.48 - Removed regexp metachars from the text # shown as expected boilerplate in verbose mode. # Improved the handling of whitespace when # comparing draft text with boilerplate. Other # minor tweaks. # # 02 Nov 2004 v1.47 - The checknits awk script has become too # long for the command line of cygwin. Adding # workaround by placing it in a temporary file. # # 25 Oct 2004 v1.46 - Added "Contact Information" as an # alternative section name of the Authors' # Addresses section # # 20 Oct 2004 v1.45 - Added check for IANA considerations section. # # 01 Oct 2004 v1.44 - Minor tweak of Table of Contents regexp. # # 24 Sep 2004 v1.43 - Changed base assumption for boilerplate # compliance to be that the draft is complying # with 3667, rather than 2026. # # 24 Sep 2004 v1.42 - Fixed a bug introduced in v1.31, where # some verbose output was never emitted. # Removed reporting of 2026 deficiencies, unless # the --rfc2026 option is given. # # 23 Sep 2004 v1.41 - Doing case-independent checks of # section names. # # 20 Sep 2004 v1.40 - Minor tweak of page header/footer # stripping # # 18 Sep 2004 v1.39 - Improved the page header/footer # stripping to handle more cases of paragraphs # split over page breaks, and a greater variety # of whitespace in the page break. Tweaked the # verbose output format slightly. # # 18 Sep 2004 v1.38 - Added a tiny tweak to the rfc3667_claim # regexp, to make paragraph merging work for it. # idnits now correctly validates the output of # xml2rfc 1.25, which merges this claim with the # 3667 section 5.1 IPR Disclosure Ack. # # 17 Sep 2004 v1.37 - Added more diagnostic information for the # case where there is a boilerplate mismatch, but # the start of the boilerplate matches: Now # showing both the expected pattern and the found # text. Tweaked the header/footer stripping # regexps to handle more variations. # # 12 Sep 2004 v1.36 - Major overhaul of boilerplate matching # mechanism. idnits is now attempting to match # run-together paragraphs containing multiple individual # boilerplate elements. This should make it correctly # accept more documents. Most basic examples of # run-together boilerplate should be handled with this # release. # # 08 Sep 2004 v1.35 - Added some newlines for nicer verbose # mode output. # # 28 Jul 2004 v1.34 - Not warning for capital 'O' in "Table # Of Contents". Started restructuring needed to # support detection of run-together boilerplate # paragraphs. Added output of expected # boilerplate pattern in --verbose mode when # boilerplate mismatch found. # # 09 Jul 2004 v1.33 - Added new alternative boilerplate for # rfc 3667 Section 5, para 1, and fixed a bug in # detecting bad boilerplate for this paragraph. # # 02 Jul 2004 v1.32 - Disabled rfc2026 acceptance. Added # "or will be disclosed, " to the initial 3667 IPR # statement. # # 29 May 2004 v1.31 - Added check for run-together boilerplate # paragraphs (first stage - not complete yet). Changed # help text to reflect the new ID-checklist document # which replaces ID-nits. Made the line numbers # reported in verbose mode come out correct. Changed # output format for verbose mode to be editor-friendly. # # 14 May 2004 v1.30 - Further changes to work correctly also # on *BSD and Solaris. # # 13 May 2004 v1.29 - Changed the reporting of the filename # and the invocation of Awk in order to work # under {Open|Free}BSD # # 13 May 2004 v1.28 - Added a check for number of lines to # the missing ToC test, for drafts which don't # have page breaks. Tweaked the Author's address # detection. # # 09 May 2004 v1.27 - Some changes in wording of warnings # regarding 2026 vs. 3667/3668 boilerplate # # 21 Apr 2004 v1.26 - # * Not complaining about too long lines when the # excess is only blank space. # * Added --verbose switch # * Reduced complaints about # - too long lines # - non-ascii characters # - hyphenation # - control characters # - spacing # to a single line when not --verbose used # * Added a warning for claiming conformance with # 2026 but using 3667/8 boilerplate # * Lines with "........" before intro indicates # Table of Contents # * Now permitting "Security Consideration" (no 's') # * Minor code tweaks # # 20 Apr 2004 v1.25 - Minor tweaks # # 15 Apr 2004 v1.24 - The test for numbered reference to RFC 2026 # having been added to the boilerplate was not quite # right. Changed, and the diagnostic message # updated. Fixed page counting which was broken # by the v1.23 changes. # # 15 Apr 2004 v1.23 - # * Added stripping of header/footer lines, in # order to be able to correctly recognize boilerplate # which has been split across pages. # * Improved regexps for boilerplate paragraphs. # * Added notes when a boilerplate section doesn't # match, but the start of it matches - could be just a # boilerplate error then. # * Did a little code refactoring. # # 08 Apr 2004 v1.22 - Another take on the 3667 vs. 2026 issue. This # version observes whether there are 2026 compliant # sections in the document, and if so notes this and # verifies against 2026, otherwise it verifies against # 3667. The --no3667 and --rfc3667 switches are no-op's # in this version. # # 08 Apr 2004 v1.21 - Added tests for the notices required by RFC # 2026. The handling of 3667 vs. 2026 notices is not # necessarily perfect - feedback from users would be # good... :-) # # Currently, by default idnits looks for 3667 # compliance. If it doesn't find it, but finds # equivalent 2026 paragraphs, it notes this when # pointing out the 3667 non-compliance. Checking for # 2026 compliance instead of 3667 compliance can be # done by specifying --no3667. # # 08 Apr 2004 v1.20 - Minor tweaks. # # 07 Apr 2004 v1.19 - Added tests for the notices required by # RFC 3667 and 3668. # # 07 Apr 2004 v1.18 - Case independent matching of section # names is now used when the --nowarn flag has # been specified. This functionality requres the # awk used to be 'gawk' - other implementations # of awk will most likely still do case-dependent # matching. # # 06 Apr 2004 v1.17 - tweaked the regexp for weird spacing a bit, # to avoid trigging on lists with 'o' bullets. Changed # the usage and help output somewhat. # # 06 Apr 2004 v1.16 - # * idnits is now a SHELL script which starts awk, # rather than being an awk script directly. This # change was done in order to permit giving option # switches directly, without having to add "--" before # the switches. # * added --nowarn option. With this option, # only nits according to the ID-nits page ar reported. # Withouth it, some warnings about misspelling etc. are # given. # * Added test for unknown options # * Also fixed a bug resulting in incorrect line # numbers when processing multiple files . # # 06 Apr 2004 v1.15 - Added a slew of new variations on the name of # the Introduction section, and some other section name # variations. All warnings now occurring when run on # RFC 3600-3742 now seems to be correct and make sense. # Also zeroed all counters on new filename - required # for correct operation on multiple files... # # 06 Apr 2004 v1.14 - Accepting another variant on 'Table of # Contents' spelling. Added a warning for misspelling, # though. # # 04 Apr 2004 v1.13 - Added diagnostic for another misspelling of # Authors' Address(es) # # 03 Apr 2004 v1.12 - Added another test for table of contents. # Tweaked some diagnostics' wording. # # 02 Apr 2004 v1.11 - Added printing of program version as first # output line. # # 02 Apr 2004 v1.10 - Now accepting more variations of the names of # requried sections. Added missing newline after # hyphenation warning. Refined test for split # references. # # 01 Apr 2004 v1.09 - Some tests did not work with files # having DOS line ending - fixed. Added more # informative diagnostics for non-ascii characters # and control characters. # # 30 Mar 2004 v1.08 - Refined the test for normative / # informative references somewhat. Added the # ability to sensibly handle the case of multiple # files given on the command line. Added "No nits # found" output when no nits were found. # # 18 Mar 2004 v1.07 - Handling some cases of misspelling. # Some versions of gawk can't handle \000 in # patterns, changed that to \001 on line 137. # Only requireing the first Abstract / Status of # Memo section to be un-numbered. # # 16 Mar 2004 v1.06 - Added test for normative/iformative reference # section split (code contributed by Pekka Savola) # # 15 Mar 2004 v1.05 - Fixed incorrectly quoted single # aphostrophes which messed up the test for # Author's addresses # # 26 Feb 2004 v1.04 - Added --help output # # 24 Feb 2004 v1.03 - Added checks for line-break # hyphenation, numbered Abstract /Status of Memo # section, presence of required sections, some # 3667 checks. # # 24 Feb 2004 v1.02 - Added check for unreasonable spacing # # 23 Feb 2004 v1.01 - Added check for non-ascii characters # # 8 Oct 2003 v1.00 - Inherited from whereabouts unknown # # Copyright: # ----------------------------------------------------------------- # # Copyright 2002-2004 Henrik Levkowetz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # ----------------------------------------------------------------- version="1.77"; progdate=""; export LC_ALL=C # ---------------------------------------------------------------------- # Utility to find an executable # ---------------------------------------------------------------------- lookfor() { default="$1"; shift for b in "$@"; do found=$(type -p "$b" 2>/dev/null) if [ -n "$found" ]; then if [ -x "$found" ]; then echo "$found" return fi fi done echo "$default" } AWK=$(lookfor gawk gawk nawk awk) # ---------------------------------------------------------------------- # Strip headers and footers, end-of-line whitespace and \r (CR) # ---------------------------------------------------------------------- strip() { $AWK ' BEGIN { longestpage = 1; textcolumn=8; # initialise minimum indentation we found } { gsub(/\r/, ""); } { gsub(/[ \t]+$/, ""); } { pagelength++; } /\[?[Pp]age [0-9ivx]+\]?[ \t\f]*$/ { match($0, /[Pp]age [0-9ivx]+/); num = substr($0, RSTART+5, RLENGTH-5); if (num+0 > maxpage) maxpage = num+0; pagecount++; countedpage=1; if (pagelength > 58) longpagecount++; if (maxlength < pagelength) { maxlength = pagelength; longestpage = num; } pagelength = 0; } /\f/ { newpage=1; ffcount++; if (pagelength > 58) longpagecount++; if (! countedpage) { pagecount++; countedpage=0; } if (maxlength < pagelength) { maxlength = pagelength; longestpage = pagecount; } pagelength=1; next; } /\[?[Pp]age [0-9ivx]+\]?[ \t\f]*$/ { next; } /^ *Internet.Draft.+[0-9][0-9][0-9][0-9] *$/ { newpage=1; next; } /^ *INTERNET.DRAFT.+[0-9][0-9][0-9][0-9] *$/ { newpage=1; next; } /^ *Draft.+[0-9][0-9][0-9][0-9] *$/ { newpage=1; next; } /^RFC.+[0-9]+$/ { newpage=1; next; } /^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$/ { newpage=1; next; } /^[^ \t]/ { sentence=1; } /[^ \t]/ { if (newpage) { if (sentence) { outline++; print ""; } } else { if (haveblank) { outline++; print ""; } } haveblank=0; sentence=0; newpage=0; line = $0; sub(/^ *\t/, " ", line); thiscolumn = match(line, /[^ ]/); if (thiscolumn && thiscolumn < textcolumn) textcolumn = thiscolumn; } /[.:][ \t]*$/ { sentence=1; } /^[ \t]*$/ { haveblank=1; next; } { outline++; print; } END { if (pagecount == 0) pagecount = 1; if (longpagecount == 0 && pagelength > 58) longpagecount++; print ""; printf "-+- Pagecount: %d -+-\n", (pagecount > maxpage ? pagecount : maxpage); printf "-+- Maxpagelength: %d -+-\n", (pagelength > maxlenght ? pagelength : maxlength); printf "-+- Longpagecount: %d -+-\n", longpagecount; printf "-+- Longestpage: %d -+-\n", longestpage; printf "-+- Formfeedcount: %d -+-\n", ffcount; printf "-+- Indentation: %d -+-\n", textcolumn-1; } ' $1 } tmpfile() { prefix=$(basename $0) for tmpdir in $TMPDIR $TMP $TEMP /tmp .; do if [ -d $tmpdir -a -w $tmpdir ]; then tmpfn=$tmpdir/$prefix-$$.tmp echo $tmpfn exit fi done if [ -z $tmpfn ]; then echo "Can't find any writable directory for temporary files; this won't work..." > /dev/stderr fi } checknits() { program=$(tmpfile) cat << 'EOF' > $program BEGIN { option_verbose = 0; option_warn = 1; split(ENVIRON["CHECKNITS"], argv); columns = ENVIRON["COLUMNS"]; if (! columns ) columns = 80; if (columns > 80) columns = 80; if (columns < 16) columns = 16; if (columns < 73) bpcols = columns; else bpcols = 73; for (i in argv) { # Deprecated if (argv[i] == "--rfc3667") { printf "\nOption %s does nothing any more\n", argv[i]; argv[i] = ""; } if (argv[i] == "--no3667") { printf "\nOption %s does nothing any more\n", argv[i]; argv[i] = ""; } if (argv[i] == "--rfc2026") { printf "\nOption %s does nothing any more\n", argv[i]; argv[i] = ""; } if (argv[i] == "--nowarn") { option_warn = 0; argv[i] = ""; } if (argv[i] == "--filename") { option_filename = argv[i+1]; argv[i] = ""; argv[i+1] = ""; } if (argv[i] == "--debug") { option_debug = 1; argv[i] = ""; } if (argv[i] == "--nitcount") { option_nitcount = 1; argv[i] = ""; } if (argv[i] == "--pass1") { option_pass1 = 1; argv[i] = ""; } if (argv[i] == "--verbose") { option_verbose++; argv[i] = ""; } if (argv[i] == "--list-matches") { option_list_matches = 1; argv[i] = ""; } if (argv[i] == "--") { argv[i] = ""; } if (argv[i] ~ "^-.+") { printf "\nUnknown option: %s\n\n", argv[i]; usage(); exit; } if (argv[i] ~ "^--.+") { printf "\nUnknown option: %s\n\n", argv[i]; usage(); exit; } } option_pass2 = ! option_pass1; split("SOH STX ETX EOT ENQ ACK BEL BS TAB LF VT FF CR SO SI DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US", controlchar) for (i in controlchar) { controlchar[sprintf("%c",i+0)] = controlchar[i]; del controlchar[i]; } # Extract a list of hyphenated words from the document, and build an array of valid fragments if ( length(option_filename) ) { cmd = sprintf("cat %s | sed 's/[^A-Za-z0-9_-]/\\n/g' | sort | uniq | egrep -- '-' | egrep -v -- '-$' | egrep '^[A-Za-z0-9]'", option_filename ) while ( cmd | getline > 0 ) { word = $0 frag = "" for (;;) { if ( length(word) == 0 ) break pos = index(word, "-") if ( pos == 0 ) break frag = frag substr(word, 1, pos ) hyphenfrags[frag] has_hyphenlist = 1 word = substr(word, pos+1) } } } # get current year if ( "date +'%Y'" | getline year <= 0 ) { year = "[0-9]+" } has[""]; # make sure this exists as a global variable here # ------------------------------------------------------------------ # 3978 and 3979 section texts # bp["rfc3667_5_1"] = "By submitting this Internet-Draft, (I|we) certify that any applicable " \ "patent or other IPR claims of which (I am|we are) aware have been disclosed,( or " \ "will be disclosed,)? and any of which (I|we) become aware will be disclosed, " \ "in accordance with RFC 3668.$"; bp["rfc3978_5_1"] = "By submitting this Internet-Draft, each author represents that any " \ "applicable patent or other IPR claims of which he or she is aware " \ "have been or will be disclosed, and any of which he or she becomes " \ "aware will be disclosed, in accordance with Section 6 of BCP 79.$"; bp["rfc3978_5_1a"] = "By submitting this Internet-Draft, (each|the) author represents that any " \ "applicable patent or other IPR claims of which (he or she|he|she) is aware " \ "have been or will be disclosed, and any of which (he or she|he|she) becomes? " \ "aware will be disclosed, in accordance with( Section 6 of)? (BCP 79|RFC 3979|RFC 3668).$"; bp["rfc3978_5_2a"] = "This document may not be modified, and derivative works of it may " \ "not be created, except to publish it as an RFC and to translate it " \ "into languages other than English.$"; bp["rfc3978_5_2ax"] = "This document may not be modified, and derivative works of it may " \ "not be created, except to publish it as an RFC and to translate it " \ "into languages other than English other than to extract section " \ "[0-9.]+ as-is for separate use.$"; bp["rfc3978_5_2b"] = "This document may not be modified, and derivative works of it may " \ "not be created.$"; bp["rfc3978_5_2bx"] = "This document may not be modified, and derivative works of it may " \ "not be created other than to extract section [0-9.]+ as-is for separate use.$"; bp["rfc3978_5_3"] = "This document may only be posted in an Internet-Draft.$"; bp["rfc3978_5_4p1"] = sprintf("Copyright \\(C\\) The Internet Society \\(?%s\\)?.$", year); bp["rfc3978_5_4p2"] = "This document is subject to the rights, licenses and restrictions contained in BCP " \ "78, and except as set forth therein, the authors retain all their rights.$"; bp["rfc3978_5_5"] = "This document and the information contained herein are provided " \ "on an \"AS IS\" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE " \ "REPRESENTS OR IS SPONSORED BY \\(IF ANY\\), THE INTERNET SOCIETY AND " \ "THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, " \ "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT " \ "THE USE OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR " \ "ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A " \ "PARTICULAR PURPOSE.$"; bp["rfc3979_5p1"] = "The IETF takes no position regarding the validity or scope of any " \ "Intellectual Property Rights or other rights that might be claimed " \ "to pertain to the implementation or use of the technology " \ "described in this document or the extent to which any license " \ "under such rights might or might not be available; nor does it " \ "represent that it has made any independent effort to identify any " \ "such rights. Information on the procedures with respect to rights " \ "in RFC documents can be found in BCP 78 and BCP 79.$"; bp["rfc3979_5p2"] = "Copies of IPR disclosures made to the IETF Secretariat and any " \ "assurances of licenses to be made available, or the result of an " \ "attempt made to obtain a general license or permission for the use " \ "of such proprietary rights by implementers or users of this " \ "specification can be obtained from the IETF on-line IPR repository " \ "at http://www.ietf.org/ipr.$"; bp["rfc3979_5p3"] = "The IETF invites any interested party to bring to its attention " \ "any copyrights, patents or patent applications, or other " \ "proprietary rights that may cover technology that may be required " \ "to implement this standard. Please address the information to the " \ "IETF at ietf-ipr@ietf.org.$"; bp["rfc2026_10_4A"] = "The IETF takes no position regarding the validity or scope of " \ "any intellectual property or other rights that might be claimed " \ "to pertain to the implementation or use of the technology " \ "described in this document or the extent to which any license " \ "under such rights might or might not be available; neither does " \ "it represent that it has made any effort to identify any such " \ "rights. Information on the IETF\047s procedures with respect to " \ "rights in standards-track and standards-related documentation " \ "can be found in BCP-11. Copies of claims of rights made " \ "available for publication and any assurances of licenses to " \ "be made available, or the result of an attempt made " \ "to obtain a general license or permission for the use of such " \ "proprietary rights by implement[oe]rs or users of this " \ "specification can be obtained from the IETF Secretariat.$"; bp["rfc2026_10_4B"] = "The IETF invites any interested party to bring to its " \ "attention any copyrights, patents or patent applications, or " \ "other proprietary rights which may cover technology that may be " \ "required to practice this standard. Please address the " \ "information to the IETF Executive Director.$"; bp["rfc2026_10_4Cp1"] = "Copyright \\([Cc]\\) The Internet Society \\(?[0-9]+\\)?. All Rights " \ "Reserved.$"; bp["rfc2026_10_4Cp2"] = "This document and translations of it may be copied and " \ "furnished to others, and derivative works that comment on or " \ "otherwise explain it or assist in its implementation may be " \ "prepared, copied, published and distributed, in whole or in " \ "part, without restriction of any kind, provided that the above " \ "copyright notice and this paragraph are included on all such " \ "copies and derivative works. However, this document itself may " \ "not be modified in any way, such as by removing the copyright " \ "notice or references to the Internet Society or other Internet " \ "organizations, except as needed for the purpose of developing " \ "Internet standards in which case the procedures for copyrights " \ "defined in the Internet Standards process must be followed, or " \ "as required to translate it into languages other than English.$"; bp["rfc2026_10_4Cp3"] = "The limited permissions granted above are perpetual and will " \ "not be revoked by the Internet Society or its successors or " \ "assign(ee)?s.$"; bp["rfc2026_10_4Cp4"] = "This document and the information contained herein is provided " \ "on an \"AS IS\" basis and THE INTERNET SOCIETY AND THE INTERNET " \ "ENGINEERING TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR " \ "IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE " \ "OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY " \ "IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A " \ "PARTICULAR PURPOSE.$"; bp["rfc2026_10_4D"] = "The IETF has been notified of intellectual property rights " \ "claimed in regard to some or all of the specification contained " \ "in this document. For more information consult the online list " \ "of claimed rights.$"; bp["rfc2026_claim"] = "This document is an Internet-Draft and is in full conformance " \ "with all provisions of Section 10 of \\[?RFC.?2026\\]?\\."; bp["rfc2026_lax_claim"] = "This document is an Internet-Draft and is in full conformance " \ "with all provisions of Section 10 of \\[?RFC.?2026\\]?( ?\\[[0-9]+\\].*)?."; bp["rfc2026b_claim"] = "This document is an Internet-Draft and is subject to all provisions " \ "of Section 10 of \\[?RFC.?2026\\]?\\."; bp["rfc2026b_lax_claim"] = "This document is an Internet-Draft and is subject to all provisions " \ "of Section 10 of \\[?RFC.?2026\\]?( ?\\[[0-9]+\\].*)?."; bp["rfc3978_claim"] = "This document is an Internet-Draft and is subject to all provisions " \ "of [Ss]ection 3 of \\[?RFC.?(3978|3667)\\]?\\.$"; bp["1id_guidelines_p1"] = "Internet-Drafts are working documents of the Internet Engineering " \ "Task Force \\(IETF\\), its areas, and its working groups. Note that other " \ "groups may also distribute working documents as Internet-Drafts.$"; bp["1id_guidelines_p2"] = "Internet-Drafts are draft documents valid for a maximum of six months " \ "and may be updated, replaced, or obsoleted by other documents at any " \ "time. It is inappropriate to use Internet-Drafts as reference " \ "material or to cite them other than as \"?work in progress\\.\"?$"; bp["1id_guidelines_p2a"] = "Internet-Drafts are draft documents valid for a maximum of six months " \ "and may be updated, replaced, or obsoleted by other documents at any " \ "time. It is inappropriate to use Internet-Drafts as reference " \ "material or to cite them other than as \"?work in progress\"?\\.$"; bp["1id_guidelines_p3"] = "The list of current Internet-Drafts can be accessed at " \ "http://www.ietf.org/1id-abstracts.html$"; bp["1id_guidelines_p3a"] = "The list of current Internet-Drafts can be accessed at:? " \ "http:// *www.ietf.org/ *(ietf/)?1id-abstracts.(txt|html) ?\\.?$"; bp["1id_guidelines_p4"] = "The list of Internet-Draft Shadow Directories can be accessed at " \ "http://www.ietf.org/shadow.html$"; bp["1id_guidelines_p4a"] = "The list of Internet-Draft Shadow Directories can be accessed at:? " \ "http:// *www.ietf.org/ *shadow.html ?\\.?$"; } # ---------------------------------------------------------------------- # usage() # function usage() { print "" \ "Usage: idnits [options] filename\n" \ "\n" \ " Options:\n" \ " --version Print the version and exit\n" \ " --help Print this text and exit\n" \ " --nowarn Don\047t issue warnings, only ID-nits violations\n" \ " --verbose Show more information about offending lines\n" \ " --nitcount Show a count of nits\n" \ " --debug Debug output, especially of boilerplate matching\n" \ " --rfc2026 Report 2026 boilerplate deficiencies\n" \ "" } # ---------------------------------------------------------------------- # strip() # function strip(str) { sub(/^[ \t\n\r]+/, "", str); sub(/[ \t\n\r]+$/, "", str); return str; } # ---------------------------------------------------------------------- # get_para() # function get_para() { para = strip($0); while (getline > 0 && (text = strip($0)) != "") { if (para ~ /-$/) { para = para text; } else { para = para " " text; } } return para; } # ---------------------------------------------------------------------- # match_para() # function match_para(para, name1, name2, name3, name4) { name[1] = name1; name[2] = name2; name[3] = name3; name[4] = name4; for (i=1; i <= 4; i++) { pat[i] = bp[name[i]]; } almost_boilerplate[name[1]] = para; gsub(/ +/, " ", para); pattern = ""; for (i=1; i <= 4; i++) { if (!pat[i]) return 0; gsub(/. /,". ?", pat[i]); pattern = pattern pat[i]; if (option_debug && option_pass2) { print "\n----",name[i],"----" print "line", FNR, "\n" print para print "" print pattern print "" if (para ~ pattern) { print "** Matches **" } else { print "** No Match **" } } if (para ~ pattern) { for (j=1; j<= i; j++) has[name[j]] = FNR; return 1; } sub(/ *\$?$/," +", pattern); } return 0 } # ---------------------------------------------------------------------- # rindex(str, find) # function rindex(str, find, start, pos, incr, i) { start = 0; pos = 0; incr = length(find); while (1) { i = index(str, find); if ( i == 0 ) return pos; pos = start + i; str = substr(str, i+incr); start = start+i+incr-1; } } # ---------------------------------------------------------------------- # fold(str) # # This could have been done by piping through fold, too, but we'd have # to postprocess to add indentation anyway, and this should be a lot # quicker for short texts. # function fold(str, indent, cols, width) { pos = 1; #print "str: ", str; #print "indent: ", indent; #print "cols: ", cols; insert = "\n" indent; width = cols - length(indent); while (pos+width-1 < length(str)) { frag = substr(str, pos, width); lpos = rindex(frag, " "); if (!lpos) { frag = substr(str, pos+width); lpos = index(frag, " "); if (lpos) lpos += width; } if (lpos) { #print "** before break :", "..." substr(str, pos+lpos-9, 8) #print "** after break :", substr(str, pos+lpos, 8) "..." str = substr(str, 1, pos+lpos-2) insert substr(str, pos+lpos); pos = pos + lpos + length(insert); } else { return str; } } return str; } # ---------------------------------------------------------------------- # markiff(p1, p2) # function markdiff(p1, p2, prefixlen, para2, suffix, leading, point, temp) { prefixlen = 0; para2 = p2; gsub(/\. +/, ". ", p1); gsub(/\. +/, ". ", p2); do { if (substr(p1,1,1) == substr(p2,1,1)) { p1 = substr(p1,2); p2 = substr(p2,2); } else { break; } prefixlen++; } while (length(p1) && length(p2)); prefix = substr(para2,1,prefixlen); suffix = substr(para2,prefixlen+1); leading = length(prefix) - rindex(prefix, "\n"); temp = substr(prefix, length(prefix) - leading) spacefix = gsub(/\. +/, ". ", temp); point = index(suffix, "\n"); marker = substr("........................................................................", 1, leading+spacefix) "^\n"; return prefix substr(suffix, 1, point) marker substr(suffix, point+1) } # ---------------------------------------------------------------------- # showsection(name) # function showsection(name) { p = bp[name]; "date +'%Y'" | getline year # Get rid of regexp stuff in the boilerplate text we show the user: # - mark metacharacters with mark character "!" gsub(/[()|?+*^$]/, "!&", p); # - get rid of mark preceeded by escape char... gsub(/\\!/, "", p); # - get rid of alternates, keeping the first only gsub(/!\|[^)]*!\)/, "", p); # - and we can get rid of metacharacters not preceeded by escape char gsub(/![\(\)?+*^$]/, "", p); # and also of escape chars themselves gsub(/\\/, "", p); # - also say "year" instead of "[0-9]" gsub(/\[0-9\]/, year, p); announce("(Expected a match on the following text:") p1 = fold(p, " ", bpcols); p1 = sprintf(" \"%s\"", p1); printf(p1) if (name in almost_boilerplate) { p2 = almost_boilerplate[name]; p2 = fold(p2, " ", bpcols); p2 = sprintf (" \"%s\")\n\n\n", p2) p2 = markdiff(p1, p2); announce( "\n ... but found this:"); printf(p2) } else { print ")\n"; } } # ---------------------------------------------------------------------- # sectionerr(name, str) # function sectionerr(name, str) { if (! name in has) { print "?? internal error, expected a table match on %s", name; exit; } if (str ~ /^[aeiouyAEIOUYRH]/) { str = "The document seems to lack an " str; } else { str = "The document seems to lack a " str; } if (name in almost_boilerplate) { str = str " -- however, there\047s a paragraph with a matching beginning. Boilerplate error?"; } else { str = str ". " } err(str); if (option_verbose && option_pass2) { showsection(name) } } # ---------------------------------------------------------------------- # announce() # function announce(str) { print "" print fold(" " str, " ", columns); } # ---------------------------------------------------------------------- # note() # function note(str) { print fold(" " str, " ", columns); } # ---------------------------------------------------------------------- # warn() # function warn(str) { if (option_warn) { print fold(" - " str, " ", columns); warnings++; } } # ---------------------------------------------------------------------- # err() # function err(str) { print fold(" * " str, " ", columns); errors++; } # ---------------------------------------------------------------------- # report() # function report(filename) { if (skip_file) return; if (option_pass1) return; if (got_input) { is_rfc = (filename ~ /[Rr][Ff][Cc][0-9]+\.txt$/); { announce("Checking nits according to http://www.ietf.org/ID-Checklist.html:") warncount = warnings; errcount = errors; if (!has_abstract) { err("The document seems to lack an Abstract section."); } if (!has_intro) { err("The document seems to lack an Introduction section."); } if (!has_security) { err("The document seems to lack a Security Considerations section."); } if (!has_iana) { err("The document seems to lack an IANA Considerations section."); } if (!has_addr) { err("The document seems to lack an Authors\047 Addresses Section."); } if (has_refs && ! has_splitrefs) { err("The document seems to lack separate sections for Informative/Normative References."); } if (option_list_matches) { print fold(" Boilerplate matches:", " ", columns) for ( n in has ) { if (n) note(sprintf("Found", n, "on line", has[n])) } print ""; } if ( ( has["rfc3667_5_1"] || has["rfc3978_5_1a"] || has["rfc3978_5_1"] || has["rfc3978_5_2b"] || has["rfc3978_5_3"] || has["rfc3978_5_5"] || has["rfc3979_5p1"] || has["rfc3979_5p2"] || has["rfc3979_5p3"] ) ) { note("Checking conformance with RFC 3978/3979 boilerplate..."); non_bp_errs = errors; if ( has["rfc2026_lax_claim"] || has["rfc2026b_lax_claim"] ) { err( "The document claims conformance with section 10 of RFC 2026, but uses " \ "some RFC 3978/3979 boilerplate. As RFC 3978/3979 replaces section 10 of " \ "RFC 2026, you should not claim conformance with it if you have changed " \ "to using RFC 3978/3979 boilerplate."); } if (!(has["rfc3978_5_1"]) && !is_rfc ) { sectionerr("rfc3978_5_1", "RFC 3978 Section 5.1 IPR Disclosure Acknowledgement"); } if ( has["rfc3978_5_2b"]) { err("The document has an RFC 3978 Section 5.2(b) Derivative Works Limitation clause."); } if ( has["rfc3978_5_3"]) { err("The document has an RFC 3978 Section 5.3 Publication Limitation clause ."); } if (!has["rfc3978_5_4p1"]) { sectionerr("rfc3978_5_4p1", "RFC 3978 Section 5.4 Copyright Line"); } if (!has["rfc3978_5_4p2"]) { sectionerr("rfc3978_5_4p2", "RFC 3978 Section 5.4 Reference to BCP 78"); } if (!has["rfc3978_5_5"]) { sectionerr("rfc3978_5_5", "RFC 3978 Section 5.5 Disclaimer"); } if (! has["rfc3979_5p1"]) { sectionerr("rfc3979_5p1", "RFC 3979 Section 5, para 1 IPR Disclosure Acknowledgement"); if (has["rfc2026_10_4A"]) { note("( - It does however have an RFC 2026 Section 10.4(A) Disclaimer.)"); } } if (! has["rfc3979_5p2"]) { sectionerr("rfc3979_5p2", "RFC 3979 Section 5, para 2 IPR Disclosure Acknowledgement"); } if (! has["rfc3979_5p3"]) { sectionerr("rfc3979_5p3", "RFC 3979 Section 5, para 3 IPR Disclosure Invitation"); if (has["rfc2026_10_4B"]) { note("( - It does however have an RFC 2026 Section 10.4(B) IPR Disclosure Invitation.)"); } } if ( errors == non_bp_errs ) { note("the boilerplate looks good."); } if ( (has["rfc3667_5_1"] || has["rfc3978_5_1a"]) && !has["rfc3978_5_1"]) { note("(The document uses RFC 3667 boilerplate or RFC 3978-like " \ "boilerplate instead of verbatim RFC 3978 boilerplate. After 6 May 2005, " \ "submission of drafts without verbatim RFC 3978 boilerplate is not " \ "accepted.)"); } } else if (( has["rfc2026_lax_claim"] || has["rfc2026b_lax_claim"] || has["rfc2026_10_4Cp2"] || has["rfc2026_10_4Cp3"] || has["rfc2026_10_4Cp4"] ) ) { if (option_verbose) { print ""; if (has["rfc2026_lax_claim"] || has["rfc2026b_lax_claim"]) note(" [Claims RFC 2026 conformance...]") if (has["rfc2026_10_4Cp1"]) note(" [Has RFC 2026 Sec. 10.4 para. 1...]") if (has["rfc2026_10_4Cp2"]) note(" [Has RFC 2026 Sec. 10.4 para. 2...]") if (has["rfc2026_10_4Cp3"]) note(" [Has RFC 2026 Sec. 10.4 para. 3...]") if (has["rfc2026_10_4Cp4"]) note(" [Has RFC 2026 Sec. 10.4 para. 4...]") print ""; } err("Looks like you\047re using RFC 2026 boilerplate. Better change to RFC 3978/3979."); if ( option_2026 ) { if ( ref_in_heading_nit) { err("The document seems to add a numbered reference to RFC 2026 to the boilerplate."); } if (!has["rfc2026_10_4Cp1"]) { sectionerr("rfc2026_10_4Cp1", "RFC 2026 Section 10.4(C) Copyright Notice"); } if (!has["rfc2026_10_4Cp2"] || !has["rfc2026_10_4Cp3"]) { sectionerr("rfc2026_10_4Cp2", "RFC 2026 Section 10.4(C) Permission Grants Notice"); } if (!has["rfc2026_10_4Cp4"]) { sectionerr("rfc2026_10_4Cp4", "RFC 2026 Section 10.4(C) Disclaimer"); } } } else { errors += 7; err("Cannot find any IPR, Copyright or other required boilerplate in this document."); } # According to the nits page we should be comparing to 15 here, but # that may be without the boilerplate - and practice seems to permit # also 16 pages including boilerplate, so... if (!has_toc && ((pagecount > 16) || (FNR > 50*16))) { err("The document is more than 15 pages and seems to lack a Table of Contents."); } if (spacing > 50) { if (!option_verbose) err(sprintf("There %s %d instance%s of weird spacing in the document. " \ "Is it really formatted ragged-right, rather than justified?", \ (spacing==1?"is":"are"), spacing, (spacing==1?"":"s") )); } if (longlines) { if (!option_verbose) err(sprintf("There %s %d instance%s of too long lines in the document, " \ "the longest one being %d character%s in excess of 72.", \ (longlines==1?"is":"are"), longlines, (longlines==1?"":"s"), \ excesslength, ((excesslength > 1) ? "s" : ""))); } if (nonascii) { if (!option_verbose) err(sprintf("There %s %d instance%s of lines with non-ascii characters in the document.", \ (nonascii==1?"is":"are"), nonascii, (nonascii==1?"":"s") )); } if (controlchars) { if (!option_verbose) err(sprintf("There %s %d instance%s of lines with control characters in the document.", \ (controlchars==1?"is":"are"), controlchars, (controlchars==1?"":"s") )); } if (warncount == warnings && errcount == errors) note("No nits found."); } if ( !is_rfc ) { announce("Checking nits according to http://www.ietf.org/ietf/1id-guidelines.txt:"); warncount = warnings; errcount = errors; if (!has["1id_guidelines_p1"]) { sectionerr("1id_guidelines_p1", "1id_guidelines paragraph about Internet-Drafts being working documents"); } if (!(has["1id_guidelines_p2"] || has["1id_guidelines_p2a"])) { sectionerr("1id_guidelines_p2", "1id_guidelines paragraph about 6 months document validity"); } if (!(has["1id_guidelines_p3"] || has["1id_guidelines_p3a"])) { sectionerr("1id_guidelines_p3", "1id_guidelines paragraph about the list of current Internet-Drafts"); } if (!(has["1id_guidelines_p4"] || has["1id_guidelines_p4a"])) { sectionerr("1id_guidelines_p4", "1id_guidelines paragraph about the list of Shadow Directories"); } if ( maxpagelength > 58 ) { warn(sprintf("The page length should not exceed 58 lines per page, " \ "but there was %d longer page%s, the longest (page %d) being %d lines", longpagecount, (longpagecount==1?"":"s"), longestpage, maxpagelength)); } if ( pagecount > formfeedcount+1 ) { warn(sprintf("It seems as if not all pages are separated by form feeds - " \ "found %d form feeds but %d page%s", formfeedcount, pagecount, (pagecount==1?"":"s"))); } if (warncount == warnings && errcount == errors) note("Nothing found here (but these checks do not cover all of 1id-guidelines.txt yet)."); } { announce("Miscellaneous warnings:") warncount = warnings; errcount = errors; if ( ( has["rfc2026_lax_claim"] || has["rfc2026b_lax_claim"] ) && ( has["rfc2026_10_4Cp2"] || has["rfc2026_10_4Cp3"] || has["rfc2026_10_4Cp4"] ) ) { if (option_rfc2026) { if (!has["rfc2026_10_4A"] ) { warn("The document seems to lack an RFC 2026 Section 10.4(A) Disclaimer."); } if (!has["rfc2026_10_4B"] ) { warn("The document seems to lack an RFC 2026 Section 10.4(B) IPR Disclosure Invitation."); } if ( has["rfc2026_10_4D"]) { warn("The document has an RFC 2026 Section 10.4(D) IPR Notice."); } } } if ( has["rfc3978_5_2a"]) { warn("The document has an RFC 3978 Section 5.2(a) Derivative Works Limitation clause."); } if (!option_verbose && hyphens && !is_rfc) { warn(sprintf("There %s %d instance%s of lines with hyphenated line breaks in the document.", \ (hyphens==1?"is":"are"), hyphens, (hyphens==1?"":"s"))); } if ( misspelled_toc ) { warn("The \"Table of Contents\" section title seems to be misspelled."); } if ( misspelled_addr ) { warn("The \"Author\047s Address\" (or \"Authors\047 Addresses\") section title is misspelled."); } if (!option_verbose && spacing) { for (i=1; i<=5; i++) { if (i in line) warn(sprintf("Line %d has weird spacing: \047...%s...\047", linenum[i], extract[i])); } if (spacing > 5) warn(sprintf("(%d more instance%s...)",spacing-5, (spacing-5==1?"":"s"))); } if (warncount == warnings && errcount == errors) note("None."); } # -------------------------------------------------------------- # Summary if (errors == 0) { print ""; note("No nits found."); } else { if (option_nitcount) { print ""; note(sprintf("Summary: %d nit%s, %d warning%s", errors, (errors==1?"":"s"), warnings, (warnings==1?"":"s"))); } if (! option_verbose) { print ""; note("Run idnits with the --verbose option for more detailed information."); } } } } # ---------------------------------------------------------------------- # Pattern / actions: # /^This Internet-Draft has been deleted./ { if (option_pass2) printf " Skipping this file; it looks like a tombstone file to me.\n"; skip_file = 1; } /^This +document has been replaced by/ { if (option_pass2) printf " Skipping this file; it looks like a tombstone file to me.\n"; skip_file = 1; } /^This Internet-Draft was published as/ { if (option_pass2) printf " Skipping this file; it looks like a tombstone file to me.\n"; skip_file = 1; } /^A new Request for Comments is now available/ { if (option_pass2) printf " Skipping this file; it looks like a tombstone file to me.\n"; skip_file = 1; } /^RFC [0-9]+ was never issued./ { if (option_pass2) printf " Skipping this file; it looks like a tombstone file to me.\n"; skip_file = 1; } /.*/ { got_input = 1; gsub(/\r/,""); sub(/[\n\t ]+$/, ""); # 1.1.a Max column 72 if (length($0) > 72 && ! skip_file) { if (option_verbose && option_pass1) { printf("%s(%d): Line is too long: the offending characters are \047%s\047\n", FILENAME, FNR, substr($0,73)); } longlines++; if (length($0) - 72 > excesslength) { excesslength = length($0) - 72; } } # 1.1.b Ragged right if (! skip_file && ! match($0, /^ *Internet.Draft/) && ! match($0, /^ *INTERNET.DRAFT/) && match($0, /[A-Za-z][a-z] ? ?[a-z]/) ) { if (option_verbose && option_pass1 ) { printf("%s(%d): Line has weird spacing: \047...%s...\047\n", FILENAME, FNR, substr($0, RSTART-5, 14)); } spacing++; line[spacing] = $0; linenum[spacing] = FNR; extract[spacing] = substr($0, RSTART-5, 14) } # 1.1.c No hyphenation for line-breaks if ( $0 ~ /[A-Za-z0-9_]-$/ && ! skip_file && ! has_refs && has_hyphenlist ) { word = $0 sub(/^.*[^A-Za-z0-9_-]/, "", word) if ( ! word in hyphenfrags ) { if (option_verbose && option_pass1) { printf("%s(%d): Line seems to end with a hyphenated word.\n", FILENAME, FNR) if (option_verbose > 1 && option_pass1) { printf(" --> %s\n", $0); } } hyphens++ } } # 1.1.e ASCII-only # The test really should go from \200 to \377 - but the awk implementation # on *BSD seems to have used \377 internally as a sentinel or something... if (match($0, "[\200-\376]") && ! skip_file) { if (option_verbose && option_pass1) { printf("%s(%d): Line contains a non-ascii character (%s) in position %d.\n", FILENAME, FNR, substr($0, RSTART,1), RSTART); if (option_verbose > 1 && option_pass1) { printf(" --> %s\n", $0); printf(" %*s\n", RSTART, "^"); } } nonascii++; } # 1.1.e no control characer except CR NL FF # nawk can't handle octal escapes inside character classes, it seems. Sigh. #if (match($0, /[\001-\011\013\016-\037]/) && ! skip_file) { if (match($0, /(\001|\002|\003|\004|\005|\006|\007|\010|\011|\013|\016|\017|\020|\021|\022|\023|\024|\025|\026|\027|\030|\031|\032|\033|\034|\035|\036|\037)/) && ! skip_file) { if (option_verbose && option_pass1) { printf("%s(%d): Line contains control character %s in position %d.\n", FILENAME, FNR, controlchar[substr($0, RSTART, 1)], RSTART); if (option_verbose > 1 && option_pass1) { printf(" --> %s\n", $0); printf(" %*s\n", RSTART, "^"); } } controlchars++; } # 1.1.f Do not number the Abstract section if ( $0 ~ /^[ \t]*Abstract[ \t]*$/ ) { abstract_seen = 1; } if ( $0 ~ /^[ \t]*[0-9][.0-9 \t]*Abstract[ \t]*$/ && ( ! abstract_seen ) && ! skip_file && option_pass2) { printf(" Abstract section seems to be numbered\n"); errors++; } # 1.1.f Do not number the "Status of Memo" section if ( $0 ~ /^[ \t]*Status of Memo[ \t]*$/ ) { status_of_memo_seen = 1; } if ( $0 ~ /^[ \t]*[0-9][.0-9 \t]*Status of Memo[ \t]*$/ && ( ! status_of_memo_seen ) && ! skip_file && option_pass2) { printf(" Status of Memo section seems to be numbered\n", FNR); errors++; } } # Pagecount /-\+- Pagecount: [0-9]+ -\+-/ { pagecount = $3+0; next } /-\+- Maxpagelength: [0-9]+ -\+-/ { maxpagelength = $3+0; next } /-\+- Longestpage: [0-9]+ -\+-/ { longestpage = $3+0; next } /-\+- Longpagecount: [0-9]+ -\+-/ { longpagecount = $3+0; next } /-\+- Formfeedcount: [0-9]+ -\+-/ { formfeedcount = $3+0; next } /-\+- Indentation: [0-9]+ -\+-/ { indentation = $3+0; indentstring = substr(1, indentation, " "); next } # Check for required sections # Nawk doesn't understand IGNORECASE = 0, so we have to explicitly convert. # Also normalise the indentation { normalized = tolower($0); if (indentation) normalized = substr(normalized, indentation+1); } normalized ~ /^[0123.]* *abstract$/ { has_abstract = 1; } normalized ~ /^[0123.]* *table of contents:?$/ { has_toc = 1; in_toc = 1; } normalized ~ /^[ \t]*contents$/ { if (!has_intro) has_toc = 1; in_toc = 1; } normalized ~ /\. ?\. ?\. ?\. ?\. ?\. ?\. ?\. ?\. ?\. ?[0-9]+$/ { if (!has_intro) has_toc = 1; in_toc = 1; } normalized ~ /^[0123.]* *introduction[ \t]*.*/ { has_intro = 1; } normalized ~ /^[0123.]* *overview[ \t]*.*/ { has_intro = 1; } normalized ~ /^[0123.]* *rationale[ \t]*.*/ { has_intro = 1; } normalized ~ /^[0123.]* *scope[ \t]*.*/ { has_intro = 1; } normalized ~ /^[0123.]* *(historical )?background[ \t]*.*/ { has_intro = 1; } normalized ~ /^[0-9.]* *security considerations?[ \t]*.*/ { has_security = 1; } normalized ~ /^[0-9.]* *security$/ { has_security = 1; } normalized ~ /^[0-9a-z.]* *iana considerations?[ \t]*.*/ { has_iana = 1; } normalized ~ /^[0-9a-z.]* *references$/ { has_refs = 1; } normalized ~ /^[0-9a-z.]* *normative references?$/ { has_splitrefs = 1; } normalized ~ /^[0-9a-z.]* *informative references?$/ { has_splitrefs = 1; } normalized ~ /^[0-9a-z.]* *references[^a-z]+normative?$/ { has_splitrefs = 1; } normalized ~ /^[0-9a-z.]* *references[^a-z]+informative?$/ { has_splitrefs = 1; } normalized ~ /^[0-9a-z.]* *normative$/ { if (has_refs) has_splitrefs = 1; } normalized ~ /^[0-9a-z.]* *informative$/ { if (has_refs) has_splitrefs = 1; } normalized ~ /^[0-9a-z.]* *(author|editor)(\047s|s\047) +address(es)?/{ has_addr = 1; } normalized ~ /^[0-9a-z.]* *author information$/ { has_addr = 1; } normalized ~ /^[0-9a-z.]* *(author|editor)(\047s|s\047) contact information$/ { has_addr = 1; } normalized ~ /^[0-9A-Z.]* *contact information$/ { has_addr = 1; } # RFC 3978 / 2026 compliance /By submitting this Internet-Draft/ { para = get_para() match_para(para, "rfc3667_5_1") match_para(para, "rfc3978_5_1"); match_para(para, "rfc3978_5_1a") } /This document may not be modified/ { para = get_para() match_para(para, "rfc3978_5_2a") || match_para(para, "rfc3978_5_2ax"); match_para(para, "rfc3978_5_2b") || match_para(para, "rfc3978_5_2bx"); } /^[ \t0-9.]*This document may only be posted in an Internet-Draft.$/ { has["rfc3978_5_3"] = FNR; } /Copyright ?\([Cc]\) ?The Internet Society/ { para = get_para() match_para(para, "rfc3978_5_4p1", "rfc3978_5_4p2"); match_para(para, "rfc2026_10_4Cp1", "rfc2026_10_4Cp2", "rfc2026_10_4Cp3", "rfc2026_10_4Cp4"); } /This document is subject to the rights/ { para = get_para() match_para(para, "rfc3978_5_4p2"); } /This document and the information/ { para = get_para() match_para(para, "rfc3978_5_5"); match_para(para, "rfc2026_10_4Cp4"); } /The IETF takes no position regarding/ { para = get_para() match_para(para, "rfc3979_5p1", "rfc3979_5p2", "rfc3979_5p3"); match_para(para, "rfc2026_10_4A"); } /Copies of IPR disclosures made to the/ { para = get_para() match_para(para, "rfc3979_5p2", "rfc3979_5p3"); } /The IETF invites any interested party/ { para = get_para() match_para(para, "rfc3979_5p3"); match_para(para, "rfc2026_10_4B"); } /This document and translations of it/ { para = get_para() match_para(para, "rfc2026_10_4Cp2", "rfc2026_10_4Cp3", "rfc2026_10_4Cp4"); } /The limited permissions granted above/ { para = get_para() match_para(para, "rfc2026_10_4Cp3", "rfc2026_10_4Cp4"); } /The IETF has been notified of intellectual/ { para = get_para() match_para(para, "rfc2026_10_4D"); } /This document is an Internet-Draft and is/ { para = get_para() match_para(para, "rfc2026_lax_claim"); match_para(para, "rfc2026b_lax_claim"); ref_in_heading_nit = ( has["rfc2026_lax_claim"] || has["rfc2026_lax_claim"] ) && ! match_para(para, "rfc2026_claim") && ! match_para(para, "rfc2026b_claim"); match_para(para, "rfc3978_claim", "rfc3667_5_1" ); match_para(para, "rfc3978_claim", "rfc3978_5_1"); match_para(para, "rfc3978_claim", "rfc3978_5_1a"); } #/^[ \t0-9.]*Table Of [Cc]ontents?:?$/ { has_toc = 1; misspelled_toc = 1} /^[ \t0-9.]*(Author|Editor)([^\047]s|s[^\047]) [Aa]ddress(es)?$/ { has_addr = 1; misspelled_addr = 1; } /^[ \t0-9.]*(Authors?|Editors?) +[Aa]ddress(es)?$/ { has_addr = 1; misspelled_addr = 1; } # 1id_guidelines.html compliance /Internet-Drafts are working documents of/ { para = get_para() match_para(para, "1id_guidelines_p1", "1id_guidelines_p2", "1id_guidelines_p3a", "1id_guidelines_p4a") || match_para(para, "1id_guidelines_p1", "1id_guidelines_p2a", "1id_guidelines_p3a", "1id_guidelines_p4a"); } /Internet-Drafts are draft documents valid for/ { para = get_para() match_para(para, "1id_guidelines_p2", "1id_guidelines_p3a", "1id_guidelines_p4a") || match_para(para, "1id_guidelines_p2a", "1id_guidelines_p3a", "1id_guidelines_p4a"); } /The list of current Internet-Drafts can be/ { para = get_para() match_para(para, "1id_guidelines_p3", "1id_guidelines_p4a") || match_para(para, "1id_guidelines_p3a", "1id_guidelines_p4a"); } /The list of Internet-Draft Shadow Directories/ { para = get_para() match_para(para, "1id_guidelines_p4") || match_para(para, "1id_guidelines_p4a"); } END { report(option_filename); if (errors || longlines || hyphens || spacing || nonascii || controlchars ) print ""; if (total_errors < 256) { exit total_errors; } else { exit 255; } } EOF $AWK -f $program $* if [ ! "$optkeep" = "1" ]; then rm -f $program fi } helpmsg() { echo " Usage: idnits [options] filename Options: --version Print the version and exit --help Print this text and exit --nowarn Don\047t issue warnings, only ID-nits violations --verbose Show more information about offending lines --nitcount Show a count of nits --debug Debug output, especially of boilerplate matching --rfc2026 Report 2026 boilerplate deficiencies 'idnits' looks for violations of Section 2.1 and 2.2 of the requirements listed on http://www.ietf.org/ID-Checklist.html idnits works on Linux, OS-X, Windows under Cygwin, on *BSD and may work on Solaris. Testing on *BSD and Solaris has been minimal, though. To install, simply download the script, place it in your path and make it executable. idnits uses awk and sh internally. Many, but not all ID-nits are checked; here's the list: 1.1 Formatting yes * Not beyond the 72nd column of a line This is especially important for diagrams and code, which the RFC Editor may not be able to trivially reformat to fall within the margins. yes * Must be ragged right * No hyphenation for line-breaks * No footnotes yes * ASCII-only, no control characters (other than CR, NL & FF) yes * Do not number the \"Status of Memo\" or Abstract sections yes@ * Do not add a numbered reference in the ID boilerplate to RFC 3978 or 3979 (makes it harder for the RFC editor to process the document when they strip off the ID boilerplate) * Reasonably well formatted for readibility and clarity. * Use network byte order in diagrams (see draft-rfc-editor-rfc2223bis-07.txt section 3.4) 1.2 Required sections - all IDs yes@ * Internet Draft boilerplate Must contain boilerplate that permits publication as an RFC (see RFC3978, Section 5.2.) * List of authors/editors There should not be > 5 authors/editors (see http://www.rfc-editor.org/policy.html) yes * Abstract yes * Table of Contents, required if document is more than 15 pages yes * Introduction yes * Security Considerations yes * IANA Considerations yes * References Must be split into normative and informative sections (see http://www.rfc-editor.org/policy.html) yes * Author's Address yes * IPR notices, IPR Notice, verbatim from RFC3979, Section 5. yes * Copyright Notice and Disclaimer, verbatim from RFC3978, Sections 5.4 and 5.5. ------------------------------------------------------------------------ @: Interpreted in light of the new requirements introduced by RFC 3667 (replaced by 3978) and RFC 3668 (replaced by 3979: yes * IPR Disclosure Acknowledgement per RFC 3978 Section 5.1 yes * No Derivative Works Limitation clause per RFC 3978 Section 5.2(b) yes * No Publication Limitation clause per RFC 3978 Section 5.3 yes * Copyright Notice per RFC 3978 Section 5.4 yes * Disclaimer per RFC 3978 Section 5.5 yes * Disclaimer of IPR validity per RFC 3979 Section 5 " } while [ $# -gt 0 ]; do case "$1" in --version) echo "idnits $version" exit ;; --help) helpmsg exit ;; --verbose) optverbose="$optverbose $1" options="$options $1" ;; --keep) optkeep=1 ;; --width) optwidth=$2 shift ;; --) shift; files="$files $*" break ;; -*) options="$options $1" ;; *) files="$files $1" ;; esac shift done echo "idnits $version $progdate" if [ "$optwidth" ]; then COLUMNS=$optwidth elif [ -z $COLUMNS ]; then SIZE=$(stty size 2>/dev/null) if [ "$SIZE" ]; then set $SIZE LINES=$1 COLUMNS=$2 fi fi export CHECKNITS COLUMNS if [ -z "$files" ]; then CHECKNITS=$options strip | checknits else for file in $files; do echo "" echo "$file:" echo "" if [ "$optverbose" ]; then # run a first pass without stripping, to get correct line # numbers in the verbose reports (CHECKNITS="$optverbose --pass1" checknits $file) fi CHECKNITS="$options --filename $file" if [ "$optkeep" = "1" ]; then strip $file | tee /tmp/idnits-$$-stripped-`basename $file` | checknits else strip $file | checknits fi done fi