% This change extends patgens pattern generation algorithm to deal with up to % 10 different hyphen classes. The new algorithm has one new integer % parameter, the number of hyphen classes (between 2 and 10). This parameter % can be specified in columns 8 and 9 of the first line of the translate % file. patgen will now produce values up to 63. @x l.54 @d banner=='This is PATGEN, Version 2.3' {printed when the program starts} @y @d banner=='This is PATGEN, Version 2.3 (with multiple hyphen classes)' {printed when the program starts} @z @x l.588 @!max_val=10; {maximum number of levels$+1$, also used to denote bad patterns} @y @!max_val=64; {maximum number of levels$+1$, also used to denote bad patterns} @z @x l.603 if max_val>10 then bad:=5; @y if max_val>100 then bad:=5; @z @x l.1107 print_ln('left_hyphen_min = ',left_hyphen_min:1, ', right_hyphen_min = ',right_hyphen_min:1, ', ',imax-edge_of_word:1,' letters'); @y print_ln('left_hyphen_min = ',left_hyphen_min:1, ', right_hyphen_min = ',right_hyphen_min:1, ', hyphen_class_num = ',hyphen_class_num:1, ', ',imax-edge_of_word:1,' letters'); @z @x l.1113 @ @= @!imax: internal_code; {largest |internal_code| assigned so far} @!left_hyphen_min, @!right_hyphen_min: dot_type; @ @= begin left_hyphen_min:=2; right_hyphen_min:=3; @y @ @d hyphen_class(#)==(# mod hyphen_class_num) @= @!imax: internal_code; {largest |internal_code| assigned so far} @!left_hyphen_min, @!right_hyphen_min: dot_type; @!hyphen_class_num: 2..10; { allow up to 10 hyphen classes, default is 2 } @ @= begin left_hyphen_min:=2; right_hyphen_min:=3; hyphen_class_num:=2; @z @x If the values specified for \.{\\lefthyphenmin} and \.{\\righthyphenmin} are invalid (e.g., blank) new values are read from the terminal. @y Another addition is, that columns 8 and~9 may optionally contain a value for \.{\\hyphenclassnum}. If the values specified for \.{\\lefthyphenmin}, \.{\\righthyphenmin} and \.{\\hyphenclassnum} are invalid (e.g., blank) new values are read from the terminal. @z @x l.1148 if (n>=1)and(n=1)and(n=2)and(n<=10) then hyphen_class_num:=n@+ else bad:=true; @z @x l.1358 @!more_to_come: boolean; @y @!more_to_come: boolean; @!off_count: array [1..9] of integer; { off by $<-3$, $-3$, $-2$, $-1$, $0$, $1$, $2$, $3$, $>3$ } @z @x l.1480 @ @= begin for d:=0 to pat_len do hval[d]:=0; repeat d:=hyf_dot(h); if hval[d]0 then write(patout,xdig[hval[0]]); for d:=1 to pat_len do begin write_letter(pat[d])(patout); write(patout,xext[pat[d]]); if hval[d]>0 then write(patout,xdig[hval[d]]); end; write_ln(patout); end @y @ Since we have increased |max_val|, we must allow for two-digit values in patterns. @= begin for d:=0 to pat_len do hval[d]:=0; repeat d:=hyf_dot(h); if hval[d]9 then write(patout,xdig[hval[0] div 10]); if hval[0]>0 then write(patout,xdig[hval[0] mod 10]); for d:=1 to pat_len do begin write_letter(pat[d])(patout); write(patout,xext[pat[d]]); if hval[d]>9 then write(patout,xdig[hval[d] div 10]); if hval[d]>0 then write(patout,xdig[hval[d] mod 10]); end; write_ln(patout); end @z @x l.1502 @!dots: array[word_index] of hyf_type; {current hyphens} @y @!hclass: array[word_index] of digit; {current wanted hyphen classes} @z @x applied to all following words (until the next global word weight). A digit at some intercharacter position indicates a weight for that position only. The |read_word| procedure scans a line of input representing a word, and places the letters into the array |word|, with |word[1]=word[wlen]= edge_of_word|. The dot appearing between |word[dpos]| and |word[dpos+1]| is placed in |dots[dpos]|, and the corresponding dot weight in |dotw[dpos]|. @y applied to all following words (until the next global word weight). A digit at the beginning of some intercharacter position indicates a weight for that position only. A digit at the end of some intercharacter position indicates the hyphen class for the hyphen wanted at this position. The |read_word| procedure scans a line of input representing a word, and places the letters into the array |word|, with |word[1]=word[wlen]= edge_of_word|. The class of the hyphen appearing between |word[dpos]| and |word[dpos+1]| is placed in |hclass[dpos]|, and the corresponding dot weight in |dotw[dpos]|. @z @x l.1537 @p procedure read_word; label done, found; var c: text_char; @!t: trie_pointer; begin read_buf(dictionary); word[1]:=edge_of_word; wlen:=1; buf_ptr:=0; repeat incr(buf_ptr); c:=buf[buf_ptr]; case xclass[c] of space_class: goto found; digit_class: if wlen=1 then {global word weight} begin if xint[c]<>word_wt then wt_chg:=true; word_wt:=xint[c]; end else dotw[wlen]:=xint[c]; {dot weight} hyf_class: dots[wlen]:=xint[c]; {record the dot |c|} letter_class: {record the letter |c|} begin incr(wlen); if wlen=max_len then begin print_buf; overflow('word length=',max_len:1); end; word[wlen]:=xint[c]; dots[wlen]:=no_hyf; dotw[wlen]:=word_wt; end; escape_class: {record a multi-character sequence starting with |c|} begin incr(wlen); if wlen=max_len then begin print_buf; overflow('word length=',max_len:1); end; get_letter(word[wlen]); dots[wlen]:=no_hyf; dotw[wlen]:=word_wt; end; invalid_class: bad_input('Bad character'); @.Bad character@> end; until buf_ptr=max_buf_len; found: incr(wlen); word[wlen]:=edge_of_word; end; @y @p procedure read_word; label done, found; var c: text_char; i: word_index; @!t: trie_pointer; begin read_buf(dictionary); word[1]:=edge_of_word; wlen:=1; buf_ptr:=0; for i:=0 to max_len do hclass[i]:=0; repeat incr(buf_ptr); c:=buf[buf_ptr]; case xclass[c] of space_class: goto found; digit_class: if wlen=1 then {global word weight} begin if xint[c]<>word_wt then wt_chg:=true; word_wt:=xint[c]; end else if hclass[wlen]>0 then begin if hyphen_class_num>xint[c] then hclass[wlen]:=xint[c] else error('unexpected hyphen class!'); end else dotw[wlen]:=xint[c]; {dot weight} hyf_class: begin hclass[wlen]:=1; end; letter_class: {record the letter |c|} begin incr(wlen); if wlen=max_len then begin print_buf; overflow('word length=',max_len:1); end; word[wlen]:=xint[c]; hclass[wlen]:=0; dotw[wlen]:=word_wt; end; escape_class: {record a multi-character sequence starting with |c|} begin incr(wlen); if wlen=max_len then begin print_buf; overflow('word length=',max_len:1); end; get_letter(word[wlen]); dotw[wlen]:=word_wt; end; invalid_class: bad_input('Bad character'); @.Bad character@> end; until buf_ptr=max_buf_len; found: incr(wlen); word[wlen]:=edge_of_word; end; @z @x l.1628 @ The |change_dots| procedure updates the |dots| array representing the printing values of the hyphens. Initially, hyphens (and correctly found hyphens) in the word list are represented by |is_hyf| whereas non-hyphen positions (and erroneous hyphens) are represented by |no_hyf|. A Here these values are increased by one for each hyphen found by the current patterns, thus changing |no_hyf| into |err_hyf| and |is_hyf| into |found_hyf|. The routine also collects statistics about the number of good, bad, and missed hyphens. @d incr_wt(#)==Incr(#)(dotw[dpos]) @p procedure change_dots; var dpos: word_index; begin for dpos:=wlen-hyf_max downto hyf_min do begin if odd(hval[dpos]) then incr(dots[dpos]); if dots[dpos]=found_hyf then incr_wt(good_count) else if dots[dpos]=err_hyf then incr_wt(bad_count) else if dots[dpos]=is_hyf then incr_wt(miss_count); end; end; @y @ The |change_dots| procedure owes its name to the fact that its job was once to update an array called |dots| representing the printing values of the hyphens. This is no longer the case, as |dots| is gone. The routine collects statistics about the number of good, bad, and missed hyphens. @d incr_wt(#)==Incr(#)(dotw[dpos]) @p procedure change_dots; var dpos: word_index; have: integer; begin for dpos:=wlen-hyf_max downto hyf_min do begin have:=hyphen_class(hval[dpos]); {good/bad/miss statistics} if have>0 then if have=hclass[dpos] then incr_wt(good_count) else incr_wt(bad_count) else if hclass[dpos]>0 then incr_wt(miss_count); {off statistics} if have+hclass[dpos]>0 then if abs(have-hclass[dpos])<=3 then incr_wt(off_count[have-hclass[dpos]+5]) else if haveno_hyf then write(pattmp,xhyf[dots[dpos]]); if dotw[dpos]<>word_wt then write(pattmp,xdig[dotw[dpos]]); end; write_letter(word[wlen-1])(pattmp); write_ln(pattmp,xext[word[wlen-1]]); end; @y @ The following procedure outputs the word as hyphenated by the current patterns, including the found hyphen classes. A correct hyphen is shown with |found_hyf|, an incorrect one with |err_hyf|. Hyphens inhibited by the values of \.{\\lefthyphenmin} and \.{\\righthyphenmin} are {\it not} shown. @p procedure output_hyphenated_word; var dpos: word_index;@/ @!l: triec_pointer; {for |write_letter|} begin for dpos:=2 to hyf_min-1 do begin write_letter(word[dpos])(pattmp); write(pattmp,xext[word[dpos]]); end; for dpos:=hyf_min to wlen-hyf_max do begin write_letter(word[dpos])(pattmp); write(pattmp,xext[word[dpos]]); if hyphen_class(hval[dpos])>0 then begin if hyphen_class(hval[dpos])=hclass[dpos] then write(pattmp,xhyf[found_hyf]) else write(pattmp,xhyf[err_hyf]); if hyphen_class(hval[dpos])>1 then write(pattmp,xdig[hyphen_class(hval[dpos])]); end; end; for dpos:=wlen-hyf_max+1 to wlen-1 do begin write_letter(word[dpos])(pattmp); write(pattmp,xext[word[dpos]]); end; write_ln(pattmp,''); end; @z @x l.1702 @ The globals |good_dot| and |bad_dot| will be set to |is_hyf| and |no_hyf|, or |err_hyf| and |found_hyf|, depending on whether the current level is odd or even, respectively. The globals |dot_min|, |dot_max|, and |dot_len| are analogous to |hyf_min|, |hyf_max|, and |hyf_len| defined earlier. @= @!good_dot, @!bad_dot: hyf_type; {good and bad hyphens at current level} @!dot_min, @!dot_max, @!dot_len: word_index; {limits for legal dots} @ @= if procesp then begin dot_min:=pat_dot; dot_max:=pat_len-pat_dot; if dot_min= @!dot_min, @!dot_max, @!dot_len: word_index; {limits for legal dots} @ @= if procesp then begin dot_min:=pat_dot; dot_max:=pat_len-pat_dot; if dot_min= if no_more[dpos] then goto continue; if dots[dpos]=good_dot then goodp:=true else if dots[dpos]=bad_dot then goodp:=false else goto continue; @y @= if no_more[dpos] then goto continue; have:=hyphen_class(hval[dpos]); get:=hyphen_class(hyph_level); if abs(get-hclass[dpos])abs(have-hclass[dpos]) then goodp:=false else goto continue; @z @x l.1750 @p procedure do_dictionary; begin good_count:=0; bad_count:=0; miss_count:=0; @y @p procedure do_dictionary; var i: integer; begin for i:=1 to 9 do begin off_count[i]:=0; end; good_count:=0; bad_count:=0; miss_count:=0; @z @x l.1771 if (good_count+miss_count)>0 then print_ln((100*good_count/(good_count+miss_count)):1:2,' %, ', (100*bad_count/(good_count+miss_count)):1:2,' %, ', (100*miss_count/(good_count+miss_count)):1:2,' %'); @y if (good_count+miss_count)>0 then print_ln((100*good_count/(good_count+miss_count)):1:2,' %, ', (100*bad_count/(good_count+miss_count)):1:2,' %, ', (100*miss_count/(good_count+miss_count)):1:2,' %'); print_ln('off by <-3, -3,...: ', off_count[1]:1, ', ', off_count[2]:1, ', ', off_count[3]:1, ', ', off_count[4]:1, ', ', off_count[5]:1, ', ', off_count[6]:1, ', ', off_count[7]:1, ', ', off_count[8]:1, ', ', off_count[9]:1); @z @x l.1835 digit_class: begin d:=xint[c]; if d>=max_val then bad_input('Bad hyphenation value'); @.Bad hyphenation value@> if d>max_pat then max_pat:=d; hval[pat_len]:=d; end; @y digit_class: begin d:=xint[c]; if xclass[buf[buf_ptr+1]]=digit_class then begin incr(buf_ptr); c:=buf[buf_ptr]; d:=10*d+xint[c]; end; if d>=max_val then bad_input('Bad hyphenation value'); @.Bad hyphenation value@> if d>max_pat then max_pat:=d; hval[pat_len]:=d; end; @z