summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2005-08-23 15:11:24 +0000
committerlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2005-08-23 15:11:24 +0000
commitb9d35dc1143395935c8da9b4de87cabde1935c37 (patch)
tree7d8d0563149bca4dd5f3d6efa80b75f0c11e19be /lib
parente26f03cfd01f24c80a4af856e17294c8a459a319 (diff)
downloadfeed2imap-b9d35dc1143395935c8da9b4de87cabde1935c37.tar.gz
feed2imap-b9d35dc1143395935c8da9b4de87cabde1935c37.tar.bz2
feed2imap-b9d35dc1143395935c8da9b4de87cabde1935c37.zip
better handling of feeds with HTML entities.
git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@54 f70e237a-67f3-0310-a06c-d2b8a7116972
Diffstat (limited to 'lib')
-rw-r--r--lib/feed2imap/textconverters.rb23
1 files changed, 23 insertions, 0 deletions
diff --git a/lib/feed2imap/textconverters.rb b/lib/feed2imap/textconverters.rb
index ba3813a..418083f 100644
--- a/lib/feed2imap/textconverters.rb
+++ b/lib/feed2imap/textconverters.rb
@@ -27,10 +27,33 @@ class String
return (self =~ /<p>/) || (self =~ /<br>/) || (self =~ /<br\s*(\/)?\s*>/)
end
+ # returns true if the text contains escaped HTML (with HTML entities)
+ def escaped_html?
+ return (self =~ /&lt;img src=/) || (self =~ /&lt;a href=/) || (self =~ /&lt;br(\/| \/|)&gt;/)
+ end
+
+ # un-escape HTML in the text
+ def unescape_html
+ {
+ '<' => '&lt;',
+ '>' => '&gt;',
+ "'" => '&apos;',
+ '"' => '&quot;',
+ '&' => '&amp;',
+ "\047" => '&#39;'
+ }.each do |k, v|
+ gsub!(v, k)
+ end
+ self
+ end
+
# convert text to HTML
def text2html
text = self.clone
return text if text.html?
+ if text.escaped_html?
+ return text.unescape_html
+ end
# paragraphs
text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")