libtdepim

linklocator.cpp
1
22
23#include "linklocator.h"
24#include "pimemoticons.h"
25#include <tdeversion.h>
26#include <tdeglobal.h>
27#include <tdestandarddirs.h>
28#include <kstaticdeleter.h>
29#include <kmdcodec.h>
30#include <kdebug.h>
31
32#include <tqstylesheet.h>
33#include <tqfile.h>
34#include <tqregexp.h>
35
36#include <limits.h>
37
38TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonNameMap = 0;
39TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonHTMLCache = 0;
40
41static KStaticDeleter< TQMap<TQString, TQString> > smileyMapDeleter;
42static KStaticDeleter< TQMap<TQString, TQString> > smileyCacheDeleter;
43
44LinkLocator::LinkLocator(const TQString& text, int pos)
45 : mText(text), mPos(pos), mMaxUrlLen(4096), mMaxAddressLen(255)
46{
47 // If you change either of the above values for maxUrlLen or
48 // maxAddressLen, then please also update the documentation for
49 // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
50 // default values used for the maxUrlLen/maxAddressLen parameters
51 // of convertToHtml().
52
53 if ( !s_smileyEmoticonNameMap ) {
54 smileyMapDeleter.setObject( s_smileyEmoticonNameMap,
55 new TQMap<TQString, TQString>() );
56 for ( int i = 0; i < EmotIcons::EnumSindex::COUNT; ++i ) {
57 TQString imageName( EmotIcons::EnumSindex::enumToString[i] );
58 imageName.truncate( imageName.length() - 2 ); //remove the _0 bit
59 s_smileyEmoticonNameMap->insert( EmotIcons::smiley(i), imageName );
60 }
61 }
62
63 if ( !s_smileyEmoticonHTMLCache )
64 smileyCacheDeleter.setObject( s_smileyEmoticonHTMLCache,
65 new TQMap<TQString, TQString>() );
66}
67
69{
70 mMaxUrlLen = length;
71}
72
74{
75 return mMaxUrlLen;
76}
77
79{
80 mMaxAddressLen = length;
81}
82
84{
85 return mMaxAddressLen;
86}
87
89{
90 TQString url;
91 if(atUrl())
92 {
93 // handle cases like this: <link>http://foobar.org/</link>
94 int start = mPos;
95 while(mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' &&
96 TQString("<>()[]").find(mText[mPos]) == -1)
97 {
98 ++mPos;
99 }
100 /* some URLs really end with: # / & - _ */
101 const TQString allowedSpecialChars = TQString("#/&-_");
102 while(mPos > start && mText[mPos-1].isPunct() &&
103 allowedSpecialChars.find(mText[mPos-1]) == -1 )
104 {
105 --mPos;
106 }
107
108 url = mText.mid(start, mPos - start);
109 if(isEmptyUrl(url) || mPos - start > maxUrlLen())
110 {
111 mPos = start;
112 url = "";
113 }
114 else
115 {
116 --mPos;
117 }
118 }
119 return url;
120}
121
122// keep this in sync with KMMainWin::slotUrlClicked()
123bool LinkLocator::atUrl() const
124{
125 // the following characters are allowed in a dot-atom (RFC 2822):
126 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
127 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
128
129 // the character directly before the URL must not be a letter, a number or
130 // any other character allowed in a dot-atom (RFC 2822).
131 if( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() ||
132 ( allowedSpecialChars.find( mText[mPos-1] ) != -1 ) ) )
133 return false;
134
135 TQChar ch = mText[mPos];
136 return (ch=='h' && ( mText.mid(mPos, 7) == "http://" ||
137 mText.mid(mPos, 8) == "https://") ) ||
138 (ch=='v' && mText.mid(mPos, 6) == "vnc://") ||
139 (ch=='f' && ( mText.mid(mPos, 7) == "fish://" ||
140 mText.mid(mPos, 6) == "ftp://" ||
141 mText.mid(mPos, 7) == "ftps://") ) ||
142 (ch=='s' && ( mText.mid(mPos, 7) == "sftp://" ||
143 mText.mid(mPos, 6) == "smb://") ) ||
144 (ch=='m' && mText.mid(mPos, 7) == "mailto:") ||
145 (ch=='w' && mText.mid(mPos, 4) == "www.") ||
146 (ch=='f' && mText.mid(mPos, 4) == "ftp.") ||
147 (ch=='n' && mText.mid(mPos, 5) == "news:");
148 // note: no "file:" for security reasons
149}
150
151bool LinkLocator::isEmptyUrl(const TQString& url)
152{
153 return url.isEmpty() ||
154 url == "http://" ||
155 url == "https://" ||
156 url == "fish://" ||
157 url == "ftp://" ||
158 url == "ftps://" ||
159 url == "sftp://" ||
160 url == "smb://" ||
161 url == "vnc://" ||
162 url == "mailto" ||
163 url == "www" ||
164 url == "ftp" ||
165 url == "news" ||
166 url == "news://";
167}
168
170{
171 TQString address;
172
173 if ( mText[mPos] == '@' ) {
174 // the following characters are allowed in a dot-atom (RFC 2822):
175 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
176 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
177
178 // determine the local part of the email address
179 int start = mPos - 1;
180 while ( start >= 0 && mText[start].unicode() < 128 &&
181 ( mText[start].isLetterOrNumber() ||
182 mText[start] == '@' || // allow @ to find invalid email addresses
183 allowedSpecialChars.find( mText[start] ) != -1 ) ) {
184 if ( mText[start] == '@' )
185 return TQString(); // local part contains '@' -> no email address
186 --start;
187 }
188 ++start;
189 // we assume that an email address starts with a letter or a digit
190 while ( ( start < mPos ) && !mText[start].isLetterOrNumber() )
191 ++start;
192 if ( start == mPos )
193 return TQString(); // local part is empty -> no email address
194
195 // determine the domain part of the email address
196 int dotPos = INT_MAX;
197 int end = mPos + 1;
198 while ( end < (int)mText.length() &&
199 ( mText[end].isLetterOrNumber() ||
200 mText[end] == '@' || // allow @ to find invalid email addresses
201 mText[end] == '.' ||
202 mText[end] == '-' ) ) {
203 if ( mText[end] == '@' )
204 return TQString(); // domain part contains '@' -> no email address
205 if ( mText[end] == '.' )
206 dotPos = TQMIN( dotPos, end ); // remember index of first dot in domain
207 ++end;
208 }
209 // we assume that an email address ends with a letter or a digit
210 while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() )
211 --end;
212 if ( end == mPos )
213 return TQString(); // domain part is empty -> no email address
214 if ( dotPos >= end )
215 return TQString(); // domain part doesn't contain a dot
216
217 if ( end - start > maxAddressLen() )
218 return TQString(); // too long -> most likely no email address
219 address = mText.mid( start, end - start );
220
221 mPos = end - 1;
222 }
223 return address;
224}
225
226TQString LinkLocator::convertToHtml(const TQString& plainText, int flags,
227 int maxUrlLen, int maxAddressLen)
228{
229 LinkLocator locator(plainText);
230 locator.setMaxUrlLen(maxUrlLen);
232
233 TQString str;
234 TQString result((TQChar*)0, (int)locator.mText.length() * 2);
235 TQChar ch;
236 int x;
237 bool startOfLine = true;
238 TQString emoticon;
239
240 for (locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++)
241 {
242 ch = locator.mText[locator.mPos];
243 if ( flags & PreserveSpaces )
244 {
245 if (ch==' ')
246 {
247 if (startOfLine) {
248 result += "&nbsp;";
249 locator.mPos++, x++;
250 startOfLine = false;
251 }
252 while (locator.mText[locator.mPos] == ' ')
253 {
254 result += " ";
255 locator.mPos++, x++;
256 if (locator.mText[locator.mPos] == ' ') {
257 result += "&nbsp;";
258 locator.mPos++, x++;
259 }
260 }
261 locator.mPos--, x--;
262 continue;
263 }
264 else if (ch=='\t')
265 {
266 do
267 {
268 result += "&nbsp;";
269 x++;
270 }
271 while((x&7) != 0);
272 x--;
273 startOfLine = false;
274 continue;
275 }
276 }
277 if (ch=='\n')
278 {
279 result += "<br />";
280 startOfLine = true;
281 x = -1;
282 continue;
283 }
284
285 startOfLine = false;
286 if (ch=='&')
287 result += "&amp;";
288 else if (ch=='"')
289 result += "&quot;";
290 else if (ch=='<')
291 result += "&lt;";
292 else if (ch=='>')
293 result += "&gt;";
294 else
295 {
296 const int start = locator.mPos;
297 if ( !(flags & IgnoreUrls) ) {
298 str = locator.getUrl();
299 if (!str.isEmpty())
300 {
301 TQString hyperlink;
302 if(str.left(4) == "www.")
303 hyperlink = "http://" + str;
304 else if(str.left(4) == "ftp.")
305 hyperlink = "ftp://" + str;
306 else
307 hyperlink = str;
308
309 str = str.replace('&', "&amp;");
310 result += "<a href=\"" + hyperlink + "\">" + str + "</a>";
311 x += locator.mPos - start;
312 continue;
313 }
314 str = locator.getEmailAddress();
315 if(!str.isEmpty())
316 {
317 // len is the length of the local part
318 int len = str.find('@');
319 TQString localPart = str.left(len);
320
321 // remove the local part from the result (as '&'s have been expanded to
322 // &amp; we have to take care of the 4 additional characters per '&')
323 result.truncate(result.length() - len - (localPart.contains('&')*4));
324 x -= len;
325
326 result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
327 x += str.length() - 1;
328 continue;
329 }
330 }
331 if ( flags & ReplaceSmileys ) {
332 str = locator.getEmoticon();
333 if ( ! str.isEmpty() ) {
334 result += str;
335 x += locator.mPos - start;
336 continue;
337 }
338 }
339 if ( flags & HighlightText ) {
340 str = locator.highlightedText();
341 if ( !str.isEmpty() ) {
342 result += str;
343 x += locator.mPos - start;
344 continue;
345 }
346 }
347 result += ch;
348 }
349 }
350
351 return result;
352}
353
354TQString LinkLocator::pngToDataUrl( const TQString & iconPath )
355{
356 if ( iconPath.isEmpty() )
357 return TQString();
358
359 TQFile pngFile( iconPath );
360 if ( !pngFile.open( IO_ReadOnly | IO_Raw ) )
361 return TQString();
362
363 TQByteArray ba = pngFile.readAll();
364 pngFile.close();
365 return TQString::fromLatin1("data:image/png;base64,%1")
366 .arg( KCodecs::base64Encode( ba ).data() );
367}
368
369
370TQString LinkLocator::getEmoticon()
371{
372 // smileys have to be prepended by whitespace
373 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
374 return TQString();
375
376 // since smileys start with ':', ';', '(' or '8' short circuit method
377 const TQChar ch = mText[mPos];
378 if ( ch !=':' && ch != ';' && ch != '(' && ch != '8' )
379 return TQString();
380
381 // find the end of the smiley (a smiley is at most 4 chars long and ends at
382 // lineend or whitespace)
383 const int MinSmileyLen = 2;
384 const int MaxSmileyLen = 4;
385 int smileyLen = 1;
386 while ( ( smileyLen <= MaxSmileyLen ) &&
387 ( mPos+smileyLen < (int)mText.length() ) &&
388 !mText[mPos+smileyLen].isSpace() )
389 smileyLen++;
390 if ( smileyLen < MinSmileyLen || smileyLen > MaxSmileyLen )
391 return TQString();
392
393 const TQString smiley = mText.mid( mPos, smileyLen );
394 if ( !s_smileyEmoticonNameMap->contains( smiley ) )
395 return TQString(); // that's not a (known) smiley
396
397 TQString htmlRep;
398 if ( s_smileyEmoticonHTMLCache->contains( smiley ) ) {
399 htmlRep = (*s_smileyEmoticonHTMLCache)[smiley];
400 }
401 else {
402 const TQString imageName = (*s_smileyEmoticonNameMap)[smiley];
403
404#if KDE_IS_VERSION( 3, 3, 91 )
405 const TQString iconPath = locate( "emoticons",
406 EmotIcons::theme() +
407 TQString::fromLatin1( "/" ) +
408 imageName + TQString::fromLatin1(".png") );
409#else
410 const TQString iconPath = locate( "data",
411 TQString::fromLatin1( "kopete/pics/emoticons/" )+
412 EmotIcons::theme() +
413 TQString::fromLatin1( "/" ) +
414 imageName + TQString::fromLatin1(".png") );
415#endif
416
417 const TQString dataUrl = pngToDataUrl( iconPath );
418 if ( dataUrl.isEmpty() ) {
419 htmlRep = TQString();
420 }
421 else {
422 // create an image tag (the text in attribute alt is used
423 // for copy & paste) representing the smiley
424 htmlRep = TQString("<img class=\"pimsmileyimg\" src=\"%1\" "
425 "alt=\"%2\" title=\"%3\" width=\"16\" height=\"16\"/>")
426 .arg( dataUrl,
427 TQStyleSheet::escape( smiley ),
428 TQStyleSheet::escape( smiley ) );
429 }
430 s_smileyEmoticonHTMLCache->insert( smiley, htmlRep );
431 }
432
433 if ( !htmlRep.isEmpty() )
434 mPos += smileyLen - 1;
435
436 return htmlRep;
437}
438
439TQString LinkLocator::highlightedText()
440{
441 // formating symbols must be prepended with a whitespace
442 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
443 return TQString();
444
445 const TQChar ch = mText[mPos];
446 if ( ch != '/' && ch != '*' && ch != '_' )
447 return TQString();
448
449 TQRegExp re = TQRegExp( TQString("\\%1([0-9A-Za-z]+)\\%2").arg( ch ).arg( ch ) );
450 if ( re.search( mText, mPos ) == mPos ) {
451 uint length = re.matchedLength();
452 // there must be a whitespace after the closing formating symbol
453 if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() )
454 return TQString();
455 mPos += length - 1;
456 switch ( ch.latin1() ) {
457 case '*':
458 return "<b>" + re.cap( 1 ) + "</b>";
459 case '_':
460 return "<u>" + re.cap( 1 ) + "</u>";
461 case '/':
462 return "<i>" + re.cap( 1 ) + "</i>";
463 }
464 }
465 return TQString();
466}
467
int maxAddressLen() const
TQString getEmailAddress()
Attempts to grab an email address.
void setMaxUrlLen(int length)
Sets the maximum length of URLs that will be matched by getUrl().
static TQString pngToDataUrl(const TQString &iconPath)
Embed the given PNG image into a data URL.
void setMaxAddressLen(int length)
Sets the maximum length of email addresses that will be matched by getEmailAddress().
static TQString convertToHtml(const TQString &plainText, int flags=0, int maxUrlLen=4096, int maxAddressLen=255)
Converts plaintext into html.
int mPos
The current scan position.
int maxUrlLen() const
LinkLocator(const TQString &text, int pos=0)
Constructs a LinkLocator that will search a plaintext string from a given starting point.
TQString mText
The plaintext string being scanned for URLs and email addresses.
TQString getUrl()
Attempts to grab a URL starting at the current scan position.