001 package hirondelle.web4j.ui.translate; 002 003 import java.util.Locale; 004 import java.util.logging.Logger; 005 import java.util.regex.Pattern; 006 import java.util.regex.Matcher; 007 import javax.servlet.jsp.JspException; 008 009 import hirondelle.web4j.BuildImpl; 010 import hirondelle.web4j.ui.tag.TagHelper; 011 import hirondelle.web4j.util.EscapeChars; 012 import hirondelle.web4j.util.Util; 013 import hirondelle.web4j.util.Consts; 014 015 /** 016 Custom tag for translating base language text (or a "coder key") into a localized form, and applying 017 "wiki-style" formatting. 018 019 <P>This tag uses {@link hirondelle.web4j.ui.translate.Translator} and 020 {@link hirondelle.web4j.request.LocaleSource} to localize text. 021 022 <P>There are several use cases for this tag. In general, the attributes control: 023 <ul> 024 <li><a href="#BaseText">specifying base text</a> to be translated 025 <li><a href="#WikiStyleFormatting">formatting</a> of the translated result 026 <li><a href="#TurningOffTranslation">turning off translation</a> altogether 027 </ul> 028 029 <P><b><a name="BaseText">Specifying Base Text</a></b> 030 <P>The base text may be specified simply as the tag body, or as the <tt>value</tt> attribute. 031 <P>Example 1 : <br> 032 <PRE> 033 <w:txt> 034 Of prayers, I am the prayer of silence. 035 Of things that move not, I am the Himalayas. 036 </w:txt> 037 </PRE> 038 039 Example 2 uses a "coder key" : <br> 040 <PRE> 041 <w:txt value="quotation.from.bhagavad.gita" /> 042 </PRE> 043 044 <P>This second form is intended especially for translating items appearing 045 inside a tag. 046 047 <P>These two use cases are mutually exclusive : either a body must be specified, or 048 the <tt>value</tt> attribute must be specified, <em>but not both</em>. 049 050 <P>Here is another example, combining the two styles : 051 <PRE> 052 <span title='<w:txt value="quotation.from.bhagavad.gita" />'> 053 <w:txt> 054 Of prayers, I am the prayer of silence. 055 Of things that move not, I am the Himalayas. 056 </w:txt> 057 </span> 058 </PRE> 059 060 <P>In either case, the item to be translated may be either some text in the 061 application's base language, or it may be a 'coder key' (see {@link Translator}). 062 063 <P><a name="WikiStyleFormatting"><b>Formatting of the Result</b> 064 <P>By default, this tag will escape all special characters using {@link EscapeChars#forHTML(String)}. 065 Occasionally, it is desirable to allow some limited formatting of text input by the user. Even simple 066 effects such as bold and italic can measurably increase legibility and clarity, and allowing links 067 is also very useful. Most wikis allow such simple formatting. This tag uses the following special 068 characters to denote various effects : 069 <ul> 070 <li> *bold* for <b>bold</b> (needs intial space before first '*') 071 <li> _italic_ for <em>italic</em> (needs initial space before first '_') 072 <li>^preserve formatting^ for preserving whitespace (<PRE>) 073 <li>~~~~ on an otherwise blank line for a horizontal rule 074 <li>[link:http:\\www.javapractices.com\Topic1.cjp enumerations] produces this link: <a href="http:\\www.javapractices.com\Topic1.cjp">enumerations</a> 075 <li>one or more empty lines for a paragraph (<P>) 076 <li>a bar | at the end of a line for a line break (<BR>) 077 <li>a line starting with ' ~ ' for a bullet entry in a list 078 </ul> 079 080 <P>Example 3 has wiki style formatting: 081 <PRE> 082 <w:txt wikiMarkup="true"> 083 Default Locale | 084 _Not all Locales are treated equally_ : there *must* be ... 085 </w:txt> 086 </PRE> 087 088 Is rendered as : 089 <P>Default Locale<br> 090 <em>Not all Locales are treated equally</em> : there <b>must</b> be ... 091 092 <P>To allow the above rules to be interpreted as HTML by this tag, {@link #setWikiMarkup(boolean)} to <tt>true</tt>. 093 094 <P><b><a name="TurningOffTranslation">Turning Off Translation</a></b> 095 <P>There are two cases in which it is useful to turn off translation altogether: 096 <ul> 097 <li>using only the formatting services of this tag. For example, a message board application may want to 098 provide basic wiki-style formatting, without any translation. 099 <li>as a workaround for database issues regarding large text. Sometimes databases treat large 100 text differently from small text. For example in MySQL, it is not possible for a <tt>TEXT</tt> field to be 101 assigned a <tt>UNIQUE</tt> constraint. This split between large text and small text can be a problem, since 102 it may mean that blocks of text are treated differently simply according to their length, and workarounds for 103 large blocks of text are needed. 104 </ul> 105 106 <P>Example 4 has wiki style formatting for untranslated user input: 107 <PRE> 108 <w:txt wikiMarkup="true" translate="false"> 109 ...render some user input with wiki style formatting... 110 </w:txt> 111 </PRE> 112 113 <P>Example 5 has wiki style formatting for <em>large</em> untranslated text, hard-coded in the JSP. 114 An example of such text may be an extended section of "help" information : 115 <PRE> 116 <w:txt locale="en"> 117 ..<em>large</em> amount of hard-coded text in English... 118 </w:txt> 119 120 <w:txt locale="fr"> 121 ..<em>large</em> amount of hard-coded text in French... 122 </w:txt> 123 </PRE> 124 125 <em>The above style is outside the usual translation mechanism.</em> It does not use the configured 126 {@link Translator}. It does not translate its content at all. Rather, it will echo the tag content only 127 when the specified <tt>locale</tt> matches that returned by the {@link hirondelle.web4j.request.LocaleSource}. 128 <span class="highlight">It is recommended that this style be used only when the above-mentioned problem regarding 129 database text size exists.</span> This style implicitly has <tt>translate="false"</tt>. 130 */ 131 public final class Text extends TagHelper { 132 133 /** 134 Set the item to be translated (optional). 135 136 <P><tt>aTextAsAttr</tt> takes two forms : user-visible text in the base language, 137 or a coder key (known to the programmer, but not seen by the end user). See {@link Translator} 138 for more information. 139 140 <P>If this attribute is set, then the tag must <em>not</em> have a body. 141 142 @param aTextAsAttr must have content; this value is always trimmed by this method. 143 */ 144 public void setValue(String aTextAsAttr){ 145 checkForContent("Value", aTextAsAttr); 146 fTextAsAttr = aTextAsAttr.trim(); 147 } 148 149 /** 150 Toggle translation on and off (optional, default <tt>true</tt>). 151 152 <P>By default, text will be translated. An example of setting this item to <tt>false</tt> is a 153 discussion board, where users input in a single language, and 154 <a href="#WikiStyleFormatting">wiki style formatting</a> is desired. 155 156 <P>An example of rendering such text is : 157 <PRE> 158 <w:txt translate="false" wikiMarkup="true"> 159 This is *bold*, and so on... 160 </w:txt> 161 </PRE> 162 */ 163 public void setTranslate(boolean aValue) { 164 fIsTranslating = aValue; 165 } 166 167 /** 168 Specify an explicit {@link Locale} (optional). 169 170 <P><span class='highlight'>When this attribute is specified, this tag will never translate its content.</span> 171 Instead, this tag will simply <em>emit or suppress</em> its content, according to whether <tt>aLocale</tt> matches 172 that returned by {@link hirondelle.web4j.request.LocaleSource}. 173 */ 174 public void setLocale(String aLocale){ 175 fSpecificLocale = Util.buildLocale(aLocale); 176 fIsTranslating = false; 177 } 178 179 /** 180 Allow <a href="#WikiStyleFormatting">wiki style formatting</a> to be used (optional, default <tt>false</tt>). 181 */ 182 public void setWikiMarkup(boolean aValue){ 183 fHasWikiMarkup = aValue; 184 } 185 186 /** Validate attributes against each other. */ 187 protected void crossCheckAttributes() { 188 if( fSpecificLocale != null && fIsTranslating ){ 189 String message = "Cannot translate and specify a Locale at the same time. Page : " + getPageName(); 190 fLogger.severe(message); 191 throw new IllegalArgumentException(message); 192 } 193 } 194 195 /** See class comment. */ 196 @Override protected String getEmittedText(String aOriginalBody) throws JspException { 197 if ( Util.textHasContent(fTextAsAttr) && Util.textHasContent(aOriginalBody) ){ 198 throw new JspException( 199 "Please specify text (or key) to be translated as either value attribute or tag body, but not both." + 200 " Value attribute: " +Util.quote(fTextAsAttr) + ". Tag body: " + Util.quote(aOriginalBody) + 201 " Page Name :" + getPageName() 202 ); 203 } 204 205 String baseText = Util.textHasContent(fTextAsAttr) ? fTextAsAttr : aOriginalBody; 206 String result = Consts.EMPTY_STRING; 207 if(Util.textHasContent(baseText)){ 208 Locale locale = BuildImpl.forLocaleSource().get(getRequest()); 209 if(fIsTranslating){ 210 Translator translator = BuildImpl.forTranslator(); 211 result = translator.get(baseText, locale); 212 fLogger.finest("Translating base text : " + Util.quote(baseText) + " using Locale " + locale + " into " + Util.quote(result)); 213 } 214 else { 215 fLogger.finest("LocaleSource: " + locale + ", locale attribute: " + fSpecificLocale); 216 if (fSpecificLocale == null || fSpecificLocale.equals(locale)){ 217 fLogger.finest("Echoing tag content (possibly adding formatting)."); 218 result = baseText; 219 } 220 else { 221 fLogger.finest("Suppressing tag content."); 222 result = Consts.EMPTY_STRING; 223 } 224 } 225 result = processMarkup(result); 226 } 227 return result; 228 } 229 230 /** 231 Translate the text into a result. Escapes characters, then optionally changes wiki markup into hypertext. 232 Made package-private for testing purposes. 233 */ 234 String processMarkup(String aText) { 235 String result = aText; 236 if(Util.textHasContent(result)){ 237 result = EscapeChars.forHTML(result); 238 if( fHasWikiMarkup ){ 239 result = changePseudoMarkupToHTML(result); 240 } 241 } 242 return result; 243 } 244 245 // PRIVATE 246 private String fTextAsAttr; 247 private boolean fIsTranslating = true; 248 private Locale fSpecificLocale; 249 private boolean fHasWikiMarkup = false; 250 private static final Logger fLogger = Util.getLogger(Text.class); 251 252 /* 253 The regexes don't refer to the original '*' and so on. Rather, they refer to the 254 escaped versions thereof - their 'fingerprints', so to speak. 255 256 Implementation Note: 257 Seeing undesired wiki-formatting of _blah_ text in links. To fix, introduced second, small variation 258 for *blah* and _blah_, which matches to the beginning of the input. 259 260 This is a rather hacky, and a more robust implmentation would use javacc. 261 */ 262 263 private static final Pattern PSEUDO_LINK = Pattern.compile("[link:((\\S)*) ((.)+?)\\]"); 264 private static final Pattern PSEUDO_BOLD = Pattern.compile("(?: *)((?:.)+?)(?:*)"); 265 private static final Pattern PSEUDO_BOLD_START_OF_INPUT = Pattern.compile("(?:^*)((?:.)+?)(?:*)"); 266 private static final Pattern PSEUDO_ITALIC = Pattern.compile("(?: _)((?:.)+?)(?:_)"); 267 private static final Pattern PSEUDO_ITALIC_START_OF_INPUT = Pattern.compile("(?:^_)((?:.)+?)(?:_)"); 268 private static final Pattern PSEUDO_CODE = Pattern.compile("(?:\\^)((?:.)+?)(?:\\^)", Pattern.MULTILINE | Pattern.DOTALL); 269 private static final Pattern PSEUDO_HR = Pattern.compile("^(?:\\s)*~~~~(?:\\s)*$", Pattern.MULTILINE); 270 private static final Pattern PSEUDO_PARAGRAPH = Pattern.compile("(^(?:\\s)*$)", Pattern.MULTILINE); 271 private static final Pattern PSEUDO_LINE_BREAK = Pattern.compile("\\|(?: )*$", Pattern.MULTILINE); 272 private static final Pattern PSEUDO_LIST = Pattern.compile("^(?: )*(\\~)(?: )", Pattern.MULTILINE); 273 274 private String changePseudoMarkupToHTML(String aText){ 275 String result = null; 276 result = addLink(aText); 277 result = addBold(result); 278 result = addBold2(result); 279 result = addItalic(result); 280 result = addItalic2(result); 281 result = addLineBreak(result); 282 result = addParagraph(result); 283 result = addList(result); 284 result = addCode(result); 285 result = addHorizontalRule(result); 286 return result; 287 } 288 289 private String addBold(String aText){ 290 Matcher matcher = PSEUDO_BOLD.matcher(aText); 291 return matcher.replaceAll(" <b>$1</b>"); //extra space 292 } 293 294 private String addBold2(String aText){ 295 Matcher matcher = PSEUDO_BOLD_START_OF_INPUT.matcher(aText); 296 return matcher.replaceAll("<b>$1</b>"); //extra space 297 } 298 299 private String addItalic(String aText){ 300 Matcher matcher = PSEUDO_ITALIC.matcher(aText); 301 return matcher.replaceAll(" <em>$1</em>"); //extra space 302 } 303 304 private String addItalic2(String aText){ 305 Matcher matcher = PSEUDO_ITALIC_START_OF_INPUT.matcher(aText); 306 return matcher.replaceAll("<em>$1</em>"); //extra space 307 } 308 309 private String addCode(String aText){ 310 Matcher matcher = PSEUDO_CODE.matcher(aText); 311 return matcher.replaceAll("<PRE>$1</PRE>"); 312 } 313 314 private String addHorizontalRule(String aText){ 315 Matcher matcher = PSEUDO_HR.matcher(aText); 316 return matcher.replaceAll("<hr>"); 317 } 318 319 private String addLink(String aText){ 320 Matcher matcher = PSEUDO_LINK.matcher(aText); 321 return matcher.replaceAll("<a href='$1'>$3</a>"); 322 } 323 324 private String addParagraph(String aText){ 325 Matcher matcher = PSEUDO_PARAGRAPH.matcher(aText); 326 return removeInitialAndFinalParagraphs(matcher.replaceAll("<P>")); 327 } 328 329 /** Bit hacky - cannot find correct regex to take care of this cleanly. */ 330 private String removeInitialAndFinalParagraphs(String aText){ 331 String result = aText.trim(); 332 if (aText.startsWith("<P>")){ 333 result = result.substring(3); 334 } 335 if (aText.endsWith("<P>")){ 336 result = result.substring(0, result.length()-3); 337 } 338 return result; 339 } 340 341 private String addLineBreak(String aText){ 342 Matcher matcher = PSEUDO_LINE_BREAK.matcher(aText); 343 return matcher.replaceAll("<BR>"); 344 } 345 346 private String addList(String aText){ 347 Matcher matcher = PSEUDO_LIST.matcher(aText); 348 return matcher.replaceAll("<br> • "); //another version of bull has a non-standard number 349 } 350 }