diff --git a/Project.toml b/Project.toml index 8b8e14b..4da2d8c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "WordCloud" uuid = "6385f0a0-cb03-45b6-9089-4e0acc74b26b" authors = ["guoyongzhi "] -version = "1.1.0" +version = "1.1.1" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" diff --git a/src/textprocessing.jl b/src/textprocessing.jl index 52abf5e..e27c788 100644 --- a/src/textprocessing.jl +++ b/src/textprocessing.jl @@ -294,9 +294,10 @@ function processtext(counter::AbstractVector{<:Union{Pair,Tuple,AbstractVector}} end function html2text(content::AbstractString) patterns = [ - r"\"[\s\S]*?\"" => " ", r"<[\s]*?script[^>]*?>[\s\S]*?<[\s]*?/[\s]*?script[\s]*?>" => " ", r"<[\s]*?style[^>]*?>[\s\S]*?<[\s]*?/[\s]*?style[\s]*?>" => " ", + r"" => " ", + r"<[\s\S]*?=\s*?\"[\s\S]*?\"\s*?>" => " ", "
" => "\n", r"<[\s\S]*?>" => " ", ] diff --git a/test/test_textprocessing.jl b/test/test_textprocessing.jl index 263d694..3c6ae03 100644 --- a/test/test_textprocessing.jl +++ b/test/test_textprocessing.jl @@ -78,4 +78,6 @@ htstr = """£abcd
""" @test strip(html2text(htstr)) == "abcd" + htstr= """"something." """ + @test replace(html2text(htstr), r"\s"=>"") == "\"something.\"" end \ No newline at end of file