Update 07.3.md

fix 通过正则修改内容中的代码未处理HTML转义的问题
This commit is contained in:
胡子豪
2021-03-03 23:32:05 +08:00
committed by GitHub
parent 72d959bfa7
commit be5f3db548

View File

@@ -75,23 +75,29 @@ func main() {
src := string(body)
//将HTML标签全转换成小写
re, _ := regexp.Compile("\\<[\\S\\s]+?\\>")
re, _ := regexp.Compile(`<[\S\s]+?>`)
src = re.ReplaceAllStringFunc(src, strings.ToLower)
//去除STYLE
re, _ = regexp.Compile("\\<style[\\S\\s]+?\\</style\\>")
re, _ = regexp.Compile(`<style[\S\s]+?</style>`)
src = re.ReplaceAllString(src, "")
//去除HTMLUnscape的STYLE
re, _ = regexp.Compile(`&lt;style[\S\s]+?&lt;/style&gt;`)
src = re.ReplaceAllString(src, "")
//去除SCRIPT
re, _ = regexp.Compile("\\<script[\\S\\s]+?\\</script\\>")
re, _ = regexp.Compile(`<script[\S\s]+?</script>`)
src = re.ReplaceAllString(src, "")
//去除HTMLUnsapce的SCRIPT
re, _ = regexp.Compile(`&lt;script[\S\s]+?&lt;/script&gt;`)
src = re.ReplaceAllString(src, "")
//去除所有尖括号内的HTML代码并换成换行符
re, _ = regexp.Compile("\\<[\\S\\s]+?\\>")
re, _ = regexp.Compile(`<[\S\s]+?>`)
src = re.ReplaceAllString(src, "\n")
//去除连续的换行符
re, _ = regexp.Compile("\\s{2,}")
re, _ = regexp.Compile(`\s{2,}`)
src = re.ReplaceAllString(src, "\n")
fmt.Println(strings.TrimSpace(src))