swift 在Swift中解码HTML实体
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了swift 在Swift中解码HTML实体相关的知识,希望对你有一定的参考价值。
import Foundation
// Very slightly adapted from http://stackoverflow.com/a/30141700/106244
// 99.99% Credit to Martin R!
// Mapping from XML/HTML character entity reference to character
// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
private let characterEntities : [String: Character] = [
// XML predefined entities:
""" : "\"",
"&" : "&",
"'" : "'",
"<" : "<",
">" : ">",
// HTML character entity references:
" " : "\u{00A0}",
"¡" : "\u{00A1}",
"¢" : "\u{00A2}",
"£" : "\u{00A3}",
"¤" : "\u{00A4}",
"¥" : "\u{00A5}",
"¦" : "\u{00A6}",
"§" : "\u{00A7}",
"¨" : "\u{00A8}",
"©" : "\u{00A9}",
"ª" : "\u{00AA}",
"«" : "\u{00AB}",
"¬" : "\u{00AC}",
"­" : "\u{00AD}",
"®" : "\u{00AE}",
"¯" : "\u{00AF}",
"°" : "\u{00B0}",
"±" : "\u{00B1}",
"²" : "\u{00B2}",
"³" : "\u{00B3}",
"´" : "\u{00B4}",
"µ" : "\u{00B5}",
"¶" : "\u{00B6}",
"·" : "\u{00B7}",
"¸" : "\u{00B8}",
"¹" : "\u{00B9}",
"º" : "\u{00BA}",
"»" : "\u{00BB}",
"¼" : "\u{00BC}",
"½" : "\u{00BD}",
"¾" : "\u{00BE}",
"¿" : "\u{00BF}",
"À" : "\u{00C0}",
"Á" : "\u{00C1}",
"Â" : "\u{00C2}",
"Ã" : "\u{00C3}",
"Ä" : "\u{00C4}",
"Å" : "\u{00C5}",
"Æ" : "\u{00C6}",
"Ç" : "\u{00C7}",
"È" : "\u{00C8}",
"É" : "\u{00C9}",
"Ê" : "\u{00CA}",
"Ë" : "\u{00CB}",
"Ì" : "\u{00CC}",
"Í" : "\u{00CD}",
"Î" : "\u{00CE}",
"Ï" : "\u{00CF}",
"Ð" : "\u{00D0}",
"Ñ" : "\u{00D1}",
"Ò" : "\u{00D2}",
"Ó" : "\u{00D3}",
"Ô" : "\u{00D4}",
"Õ" : "\u{00D5}",
"Ö" : "\u{00D6}",
"×" : "\u{00D7}",
"Ø" : "\u{00D8}",
"Ù" : "\u{00D9}",
"Ú" : "\u{00DA}",
"Û" : "\u{00DB}",
"Ü" : "\u{00DC}",
"Ý" : "\u{00DD}",
"Þ" : "\u{00DE}",
"ß" : "\u{00DF}",
"à" : "\u{00E0}",
"á" : "\u{00E1}",
"â" : "\u{00E2}",
"ã" : "\u{00E3}",
"ä" : "\u{00E4}",
"å" : "\u{00E5}",
"æ" : "\u{00E6}",
"ç" : "\u{00E7}",
"è" : "\u{00E8}",
"é" : "\u{00E9}",
"ê" : "\u{00EA}",
"ë" : "\u{00EB}",
"ì" : "\u{00EC}",
"í" : "\u{00ED}",
"î" : "\u{00EE}",
"ï" : "\u{00EF}",
"ð" : "\u{00F0}",
"ñ" : "\u{00F1}",
"ò" : "\u{00F2}",
"ó" : "\u{00F3}",
"ô" : "\u{00F4}",
"õ" : "\u{00F5}",
"ö" : "\u{00F6}",
"÷" : "\u{00F7}",
"ø" : "\u{00F8}",
"ù" : "\u{00F9}",
"ú" : "\u{00FA}",
"û" : "\u{00FB}",
"ü" : "\u{00FC}",
"ý" : "\u{00FD}",
"þ" : "\u{00FE}",
"ÿ" : "\u{00FF}",
"Œ" : "\u{0152}",
"œ" : "\u{0153}",
"Š" : "\u{0160}",
"š" : "\u{0161}",
"Ÿ" : "\u{0178}",
"ƒ" : "\u{0192}",
"ˆ" : "\u{02C6}",
"˜" : "\u{02DC}",
"Α" : "\u{0391}",
"Β" : "\u{0392}",
"Γ" : "\u{0393}",
"Δ" : "\u{0394}",
"Ε" : "\u{0395}",
"Ζ" : "\u{0396}",
"Η" : "\u{0397}",
"Θ" : "\u{0398}",
"Ι" : "\u{0399}",
"Κ" : "\u{039A}",
"Λ" : "\u{039B}",
"Μ" : "\u{039C}",
"Ν" : "\u{039D}",
"Ξ" : "\u{039E}",
"Ο" : "\u{039F}",
"Π" : "\u{03A0}",
"Ρ" : "\u{03A1}",
"Σ" : "\u{03A3}",
"Τ" : "\u{03A4}",
"Υ" : "\u{03A5}",
"Φ" : "\u{03A6}",
"Χ" : "\u{03A7}",
"Ψ" : "\u{03A8}",
"Ω" : "\u{03A9}",
"α" : "\u{03B1}",
"β" : "\u{03B2}",
"γ" : "\u{03B3}",
"δ" : "\u{03B4}",
"ε" : "\u{03B5}",
"ζ" : "\u{03B6}",
"η" : "\u{03B7}",
"θ" : "\u{03B8}",
"ι" : "\u{03B9}",
"κ" : "\u{03BA}",
"λ" : "\u{03BB}",
"μ" : "\u{03BC}",
"ν" : "\u{03BD}",
"ξ" : "\u{03BE}",
"ο" : "\u{03BF}",
"π" : "\u{03C0}",
"ρ" : "\u{03C1}",
"ς" : "\u{03C2}",
"σ" : "\u{03C3}",
"τ" : "\u{03C4}",
"υ" : "\u{03C5}",
"φ" : "\u{03C6}",
"χ" : "\u{03C7}",
"ψ" : "\u{03C8}",
"ω" : "\u{03C9}",
"ϑ" : "\u{03D1}",
"ϒ" : "\u{03D2}",
"ϖ" : "\u{03D6}",
" " : "\u{2002}",
" " : "\u{2003}",
" " : "\u{2009}",
"‌" : "\u{200C}",
"‍" : "\u{200D}",
"‎" : "\u{200E}",
"‏" : "\u{200F}",
"–" : "\u{2013}",
"—" : "\u{2014}",
"‘" : "\u{2018}",
"’" : "\u{2019}",
"‚" : "\u{201A}",
"“" : "\u{201C}",
"”" : "\u{201D}",
"„" : "\u{201E}",
"†" : "\u{2020}",
"‡" : "\u{2021}",
"•" : "\u{2022}",
"…" : "\u{2026}",
"‰" : "\u{2030}",
"′" : "\u{2032}",
"″" : "\u{2033}",
"‹" : "\u{2039}",
"›" : "\u{203A}",
"‾" : "\u{203E}",
"⁄" : "\u{2044}",
"€" : "\u{20AC}",
"ℑ" : "\u{2111}",
"℘" : "\u{2118}",
"ℜ" : "\u{211C}",
"™" : "\u{2122}",
"ℵ" : "\u{2135}",
"←" : "\u{2190}",
"↑" : "\u{2191}",
"→" : "\u{2192}",
"↓" : "\u{2193}",
"↔" : "\u{2194}",
"↵" : "\u{21B5}",
"⇐" : "\u{21D0}",
"⇑" : "\u{21D1}",
"⇒" : "\u{21D2}",
"⇓" : "\u{21D3}",
"⇔" : "\u{21D4}",
"∀" : "\u{2200}",
"∂" : "\u{2202}",
"∃" : "\u{2203}",
"∅" : "\u{2205}",
"∇" : "\u{2207}",
"∈" : "\u{2208}",
"∉" : "\u{2209}",
"∋" : "\u{220B}",
"∏" : "\u{220F}",
"∑" : "\u{2211}",
"−" : "\u{2212}",
"∗" : "\u{2217}",
"√" : "\u{221A}",
"∝" : "\u{221D}",
"∞" : "\u{221E}",
"∠" : "\u{2220}",
"∧" : "\u{2227}",
"∨" : "\u{2228}",
"∩" : "\u{2229}",
"∪" : "\u{222A}",
"∫" : "\u{222B}",
"∴" : "\u{2234}",
"∼" : "\u{223C}",
"≅" : "\u{2245}",
"≈" : "\u{2248}",
"≠" : "\u{2260}",
"≡" : "\u{2261}",
"≤" : "\u{2264}",
"≥" : "\u{2265}",
"⊂" : "\u{2282}",
"⊃" : "\u{2283}",
"⊄" : "\u{2284}",
"⊆" : "\u{2286}",
"⊇" : "\u{2287}",
"⊕" : "\u{2295}",
"⊗" : "\u{2297}",
"⊥" : "\u{22A5}",
"⋅" : "\u{22C5}",
"⌈" : "\u{2308}",
"⌉" : "\u{2309}",
"⌊" : "\u{230A}",
"⌋" : "\u{230B}",
"⟨" : "\u{2329}",
"⟩" : "\u{232A}",
"◊" : "\u{25CA}",
"♠" : "\u{2660}",
"♣" : "\u{2663}",
"♥" : "\u{2665}",
"♦" : "\u{2666}",
]
extension String {
/// Returns a new string made by replacing in the `String`
/// all HTML character entity references with the corresponding
/// character.
public var stringByDecodingHTMLEntities: String {
return decodeHTMLEntities().decodedString
}
/// Returns a tuple containing the string made by relpacing in the
/// `String` all HTML character entity references with the corresponding
/// character. Also returned is an array of offset information describing
/// the location and length offsets for each replacement. This allows
/// for the correct adjust any attributes that may be associated with
/// with substrings within the `String`
func decodeHTMLEntities() -> (decodedString: String, replacementOffsets: [(index: String.Index, offset: String.Index.Distance)]) {
// ===== Utility functions =====
// Record the index offsets of each replacement
// This allows anyone to correctly adjust any attributes that may be
// associated with substrings within the string
var replacementOffsets: [(index: String.Index, offset: String.Index.Distance)] = []
// Convert the number in the string to the corresponding
// Unicode character, e.g.
// decodeNumeric("64", 10) --> "@"
// decodeNumeric("20ac", 16) --> "€"
func decodeNumeric(string : String, base : Int32) -> Character? {
let code = UInt32(strtoul(string, nil, base))
return Character(UnicodeScalar(code))
}
// Decode the HTML character entity to the corresponding
// Unicode character, return `nil` for invalid input.
// decode("@") --> "@"
// decode("€") --> "€"
// decode("<") --> "<"
// decode("&foo;") --> nil
func decode(entity : String) -> Character? {
if entity.hasPrefix("&#x") || entity.hasPrefix("&#X"){
return decodeNumeric(entity.substringFromIndex(entity.startIndex.advancedBy(3)), base: 16)
} else if entity.hasPrefix("&#") {
return decodeNumeric(entity.substringFromIndex(entity.startIndex.advancedBy(2)), base: 10)
} else {
return characterEntities[entity]
}
}
// ===== Method starts here =====
var result = ""
var position = startIndex
// Find the next '&' and copy the characters preceding it to `result`:
while let ampRange = self.rangeOfString("&", range: position ..< endIndex) {
result += self[position ..< ampRange.startIndex]
position = ampRange.startIndex
// Find the next ';' and copy everything from '&' to ';' into `entity`
if let semiRange = self.rangeOfString(";", range: position ..< endIndex) {
let entity = self[position ..< semiRange.endIndex]
if let decoded = decode(entity) {
// Replace by decoded character:
result.append(decoded)
// Record offset
let offset = (index: semiRange.endIndex, offset: 1 - position.distanceTo(semiRange.endIndex))
replacementOffsets.append(offset)
} else {
// Invalid entity, copy verbatim:
result += entity
}
position = semiRange.endIndex
} else {
// No matching ';'.
break
}
}
// Copy remaining characters to `result`:
result += self[position ..< endIndex]
// Return results
return (decodedString: result, replacementOffsets: replacementOffsets)
}
}
以上是关于swift 在Swift中解码HTML实体的主要内容,如果未能解决你的问题,请参考以下文章
SWIFT:在解码 HTML 实体时在事务中调用 +[CATransaction synchronize]