Skip to content

Commit

Permalink
Merge pull request #14 from benbalter/gdoc-support
Browse files Browse the repository at this point in the history
Gdoc support
  • Loading branch information
benbalter committed Mar 30, 2014
2 parents db0c515 + ae4b60d commit 9ee1895
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 10 deletions.
34 changes: 24 additions & 10 deletions lib/word-to-markdown.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class WordToMarkdown
.MsoListParagraphCxSpMiddle
.MsoListParagraphCxSpLast
.MsoListParagraph
li
]

attr_reader :path, :doc
Expand Down Expand Up @@ -146,6 +147,20 @@ def li_selectors
LI_SELECTORS.join(",")
end

# Returns an array of all indented values
def indents
@indents ||= doc.css(li_selectors).map{ |el| el.indent }.uniq.sort
end

# Determine the indent level given an indent value
#
# level - the true indent, e.g., 2.5 (from 2.5em)
#
# Returns an integer representing the indent level
def indent(level)
indents.find_index level
end

# Try to make semantic markup explicit where implied by the export
def semanticize!

Expand All @@ -160,25 +175,24 @@ def semanticize!
list_type = "ul"
end

# calculate indent level
current_indent = indent(node.indent)

# Determine parent node for this li, creating it if necessary
if node.indent > indent_level
if current_indent > indent_level || indent_level == 0 && node.parent.css(".indent#{current_indent}").empty?
list = Nokogiri::XML::Node.new list_type, @doc
list.classes = ["list", "indent#{node.indent}"]
if node.indent == 1
list.parent = node.parent
else
list.parent = node.parent.css(".indent#{node.indent-1} li").last
end
list.classes = ["list", "indent#{current_indent}"]
list.parent = node.parent.css(".indent#{current_indent-1} li").last || node.parent
else
list = node.parent.css(".indent#{node.indent}").last
list = node.parent.css(".indent#{current_indent}").last
end

# Note our current nesting depth
indent_level = node.indent
indent_level = current_indent

# Convert list paragraphs to actual numbered and unnumbered lists
node.node_name = "li"
node.parent = list
node.parent = list if list

# Scrub unicode bullets
span = node.css("span:first")[1]
Expand Down
215 changes: 215 additions & 0 deletions test/fixtures/gdoc.htm
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
<html>

<head>
<meta content="text/html; charset=UTF-8" http-equiv="content-type">
<style type="text/css">
.lst-kix_id6kp4jljtnx-2>li: before {
content: "\0025a0 "
}
.lst-kix_id6kp4jljtnx-4>li: before {
content: "\0025cb "
}
.lst-kix_id6kp4jljtnx-3>li: before {
content: "\0025cf "
}
ul.lst-kix_id6kp4jljtnx-0 {
list-style-type: none
}
ul.lst-kix_id6kp4jljtnx-1 {
list-style-type: none
}
.lst-kix_id6kp4jljtnx-7>li: before {
content: "\0025cb "
}
ul.lst-kix_id6kp4jljtnx-2 {
list-style-type: none
}
ul.lst-kix_id6kp4jljtnx-3 {
list-style-type: none
}
.lst-kix_id6kp4jljtnx-6>li: before {
content: "\0025cf "
}
ul.lst-kix_id6kp4jljtnx-4 {
list-style-type: none
}
ul.lst-kix_id6kp4jljtnx-5 {
list-style-type: none
}
ul.lst-kix_id6kp4jljtnx-6 {
list-style-type: none
}
ul.lst-kix_id6kp4jljtnx-8 {
list-style-type: none
}
ul.lst-kix_id6kp4jljtnx-7 {
list-style-type: none
}
.lst-kix_id6kp4jljtnx-1>li: before {
content: "\0025cb "
}
.lst-kix_id6kp4jljtnx-8>li: before {
content: "\0025a0 "
}
.lst-kix_id6kp4jljtnx-0>li: before {
content: "\0025cf "
}
.lst-kix_id6kp4jljtnx-5>li: before {
content: "\0025a0 "
}
ol {
margin: 0;
padding: 0
}
.c1 {
widows: 2;
orphans: 2;
direction: ltr
}
.c4 {
max-width: 468pt;
background-color: #ffffff;
padding: 72pt 72pt 72pt 72pt
}
.c5 {
padding-left: 0pt;
margin-left: 72pt
}
.c0 {
margin: 0;
padding: 0
}
.c3 {
padding-left: 0pt;
margin-left: 36pt
}
.c2 {
height: 11pt
}
.title {
widows: 2;
padding-top: 0pt;
line-height: 1.15;
orphans: 2;
text-align: left;
color: #000000;
font-size: 21pt;
font-family: "Trebuchet MS";
padding-bottom: 0pt;
page-break-after: avoid
}
.subtitle {
widows: 2;
padding-top: 0pt;
line-height: 1.15;
orphans: 2;
text-align: left;
color: #666666;
font-style: italic;
font-size: 13pt;
font-family: "Trebuchet MS";
padding-bottom: 10pt;
page-break-after: avoid
}
li {
color: #000000;
font-size: 11pt;
font-family: "Arial"
}
p {
color: #000000;
font-size: 11pt;
margin: 0;
font-family: "Arial"
}
h1 {
widows: 2;
padding-top: 10pt;
line-height: 1.15;
orphans: 2;
text-align: left;
color: #000000;
font-size: 16pt;
font-family: "Trebuchet MS";
padding-bottom: 0pt;
page-break-after: avoid
}
h2 {
widows: 2;
padding-top: 10pt;
line-height: 1.15;
orphans: 2;
text-align: left;
color: #000000;
font-size: 13pt;
font-family: "Trebuchet MS";
font-weight: bold;
padding-bottom: 0pt;
page-break-after: avoid
}
h3 {
widows: 2;
padding-top: 8pt;
line-height: 1.15;
orphans: 2;
text-align: left;
color: #666666;
font-size: 12pt;
font-family: "Trebuchet MS";
font-weight: bold;
padding-bottom: 0pt;
page-break-after: avoid
}
h4 {
widows: 2;
padding-top: 8pt;
line-height: 1.15;
orphans: 2;
text-align: left;
color: #666666;
font-size: 11pt;
text-decoration: underline;
font-family: "Trebuchet MS";
padding-bottom: 0pt;
page-break-after: avoid
}
h5 {
widows: 2;
padding-top: 8pt;
line-height: 1.15;
orphans: 2;
text-align: left;
color: #666666;
font-size: 11pt;
font-family: "Trebuchet MS";
padding-bottom: 0pt;
page-break-after: avoid
}
h6 {
widows: 2;
padding-top: 8pt;
line-height: 1.15;
orphans: 2;
text-align: left;
color: #666666;
font-style: italic;
font-size: 11pt;
font-family: "Trebuchet MS";
padding-bottom: 0pt;
page-break-after: avoid
}
</style>
</head>

<body class="c4">
<ul class="c0 lst-kix_id6kp4jljtnx-0 start">
<li class="c1 c3"><span>Bullet point</span>
</li>
</ul>
<ul class="c0 lst-kix_id6kp4jljtnx-1 start">
<li class="c1 c5"><span>Indented bullet point</span>
</li>
</ul>
</body>

</html>
4 changes: 4 additions & 0 deletions test/test_word_to_markdown_lists.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ class TestWordToMarkdownLists < Test::Unit::TestCase
validate_fixture "nested-ul", "- One\n - Sub one\n - Sub sub one\n - Sub sub two\n\n - Sub two\n\n- Two"
end

should "parse gdoc nested uls" do
validate_fixture "gdoc", "- Bullet point\n\n - Indented bullet point"
end

should "parse left margin" do
doc = WordToMarkdown.new "<p style='margin-left: 25px'>foo</p>"
assert_equal 25, doc.doc.css("p").first.left_margin
Expand Down

0 comments on commit 9ee1895

Please sign in to comment.