// Run some more rules that care about quotes and whitespace.
rules = [
- // Remove MSO-blah, MSO:blah in style attributes. Only removes one or more that appear in succession.
- {regex: /(<[^>]*?style\s*?=\s*?"[^>"]*?)(?:[\s]*MSO[-:][^>;"]*;?)+/gi, replace: "$1"},
- // Remove MSO classes in class attributes. Only removes one or more that appear in succession.
- {regex: /(<[^>]*?class\s*?=\s*?"[^>"]*?)(?:[\s]*MSO[_a-zA-Z0-9\-]*)+/gi, replace: "$1"},
- // Remove Apple- classes in class attributes. Only removes one or more that appear in succession.
- {regex: /(<[^>]*?class\s*?=\s*?"[^>"]*?)(?:[\s]*Apple-[_a-zA-Z0-9\-]*)+/gi, replace: "$1"},
+ // Get all style attributes so we can work on them.
+ {regex: /(<[^>]*?style\s*?=\s*?")([^>"]*)(")/gi, replace: function(match, group1, group2, group3) {
+ // Remove MSO-blah, MSO:blah style attributes.
+ group2 = group2.replace(/(?:^|;)[\s]*MSO[-:](?:&[\w]*;|[^;"])*/gi,"");
+ return group1 + group2 + group3;
+ }},
+ // Get all class attributes so we can work on them.
+ {regex: /(<[^>]*?class\s*?=\s*?")([^>"]*)(")/gi, replace: function(match, group1, group2, group3) {
+ // Remove MSO classes.
+ group2 = group2.replace(/(?:^|[\s])[\s]*MSO[_a-zA-Z0-9\-]*/gi,"");
+ // Remove Apple- classes.
+ group2 = group2.replace(/(?:^|[\s])[\s]*Apple-[_a-zA-Z0-9\-]*/gi,"");
+ return group1 + group2 + group3;
+ }},
// Remove OLE_LINK# anchors that may litter the code.
- {regex: /<a [^>]*?name\s*?=\s*?"OLE_LINK\d*?"[^>]*?>\s*?<\/a>/gi, replace: ""},
- // Remove empty spans, but not ones from Rangy.
- {regex: /<span(?![^>]*?rangySelectionBoundary[^>]*?)[^>]*>( |\s)*<\/span>/gi, replace: ""}
+ {regex: /<a [^>]*?name\s*?=\s*?"OLE_LINK\d*?"[^>]*?>\s*?<\/a>/gi, replace: ""}
];
// Apply the rules.