Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for Pointer newsletter #5

Merged
merged 5 commits into from
Feb 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/main/kotlin/fr/nicopico/n2rss/Config.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import fr.nicopico.n2rss.mail.client.JavaxEmailClient
import fr.nicopico.n2rss.mail.client.ResourceFileEmailClient
import fr.nicopico.n2rss.mail.newsletter.AndroidWeeklyNewsletterHandler
import fr.nicopico.n2rss.mail.newsletter.NewsletterHandler
import fr.nicopico.n2rss.mail.newsletter.PointerNewsletterHandler
import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration
import org.springframework.context.annotation.Profile
Expand Down Expand Up @@ -33,6 +34,7 @@ class Config {

@Bean
fun emailProcessors(): List<NewsletterHandler> = listOf(
AndroidWeeklyNewsletterHandler()
AndroidWeeklyNewsletterHandler(),
PointerNewsletterHandler(),
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class AndroidWeeklyNewsletterHandler : NewsletterHandler {

override val newsletter: Newsletter = Newsletter(
name = "Android Weekly",
websiteUrl = "https://androidweekly.net"
websiteUrl = "https://androidweekly.net",
)

override fun canHandle(email: Email): Boolean {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package fr.nicopico.n2rss.mail.newsletter

import fr.nicopico.n2rss.models.Article
import fr.nicopico.n2rss.models.Email
import fr.nicopico.n2rss.models.Newsletter
import fr.nicopico.n2rss.utils.toURL
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.safety.Safelist

class PointerNewsletterHandler : NewsletterHandler {
override val newsletter: Newsletter = Newsletter(
"Pointer",
"http://www.pointer.io/",
)

override fun canHandle(email: Email): Boolean {
return email.sender.email.contains("[email protected]")
}

override fun extractArticles(email: Email): List<Article> {
val cleanedHtml = Jsoup.clean(
email.content.preserveSeparators(),
Safelist.basic()
.addAttributes("p", "style"),
)
val document = Jsoup.parseBodyFragment(cleanedHtml)

// Separator have been changed from <td> tags to <p> tags
// by `String.preserveSeparators()` extension method
val firstSeparator: Element = document.select("p[style]")
.first { it.isSeparator }

val sponsor = findSponsor(firstSeparator)
val articles = findArticles(firstSeparator)

return (sponsor?.let(::listOf) ?: emptyList()) + articles
}

private fun findSponsor(firstSeparator: Element): Article? {
val sponsorSection = Document("")
.apply {
appendChildren(
(firstSeparator.parent()?.childNodes() ?: emptyList())
.takeWhile { it != firstSeparator }
)
}

val sponsorSubtitleElement = sponsorSection.selectFirst("a[href]:has(strong:has(span))")
val sponsorLink = sponsorSubtitleElement?.attr("href")?.toURL()

return if (sponsorSubtitleElement != null && sponsorLink != null) {
val sponsorName = sponsorSection.select("p")
.map { it.text() }
.firstOrNull(String::isNotEmpty)
?.let {
it.substring(it.indexOf("is presented by") + 15)
}
?.trim()
?: "?"

val sponsorSubtitle = sponsorSubtitleElement.text()
val sponsorDescription = sponsorSection.text().let {
it.substring(it.indexOf(sponsorSubtitle) + sponsorSubtitle.length).trim()
}

Article(
title = "SPONSOR - $sponsorName: $sponsorSubtitle",
link = sponsorLink,
description = sponsorDescription,
)
} else null
}

private fun findArticles(firstSeparator: Element): List<Article> {
// Take articles after the first separator to ignore the sponsor
val articleSectionDocument = Document("")
.apply {
appendChildren(
(firstSeparator.parent()?.childNodes() ?: emptyList())
.dropWhile { it != firstSeparator }
)
}

val links = articleSectionDocument.select("a[href]:has(strong:has(span))")
val articles = links.mapNotNull { articleTitle ->
val link = articleTitle.attr("href").toURL()
?: return@mapNotNull null
val title = articleTitle.text()
val description = articleTitle.findDescription()

Article(
title = title,
link = link,
description = description
)
}
return articles
}

private fun Element.findDescription(): String {
val descriptionElement = parent()
?.nextElementSiblings()
?.select("p:has(strong:contains(tl;dr))")
?.first()
return descriptionElement?.text()?.removePrefix("tl;dr:")?.trim()
?: "N/A"
}

/**
* Original separator are <td> tags with a specific style, but these tags cannot be kept without
* the whole <table> structure. This function replace <td> with <p> tags
*/
private fun String.preserveSeparators(): String {
val doc = Jsoup.parse(this)
doc.select("td[style]")
.filter { element -> element.isSeparator }
.forEach { td ->
val pElement = Element("p")
.attr("style", td.attr("style"))
.html(td.html())
td.replaceWith(pElement)
}
return doc.html()
}

/**
* Check if the element has the style of a separator
* "min-width:100%;border-top:2px solid #000000"
*/
private val Element.isSeparator: Boolean
get() {
val style = attr("style")
return style.contains(Regex("border-top\\s*:\\s*2px\\s*solid\\s*#000000"))
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class AndroidWeeklyNewsletterHandlerTest {
val publication = handler.process(email)

// THEN

assertSoftly(publication) {
withClue("title") {
title shouldBe "Android Weekly #605 \uD83E\uDD16"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package fr.nicopico.n2rss.mail.newsletter

import fr.nicopico.n2rss.models.Email
import io.kotest.assertions.assertSoftly
import io.kotest.assertions.withClue
import io.kotest.matchers.kotlinx.datetime.shouldHaveSameDayAs
import io.kotest.matchers.shouldBe
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Nested
import org.junit.jupiter.api.Test
import java.net.URL

class PointerNewsletterHandlerTest {

private lateinit var handler: PointerNewsletterHandler

@BeforeEach
fun setUp() {
handler = PointerNewsletterHandler()
}

@Nested
inner class CanHandleTest {
@Test
fun `should handle all emails from AndroidWeekly`() {
// GIVEN
val emails = loadEmails("emails/Pointer")

// WHEN - THEN
emails.all { handler.canHandle(it) } shouldBe true
}

@Test
fun `should ignore all emails from another newsletters`() {
// GIVEN
val emails = loadEmails("emails/Kotlin Weekly")

// WHEN - THEN
emails.all { handler.canHandle(it) } shouldBe false
}
}

@Nested
inner class ProcessTest {
@Test
fun `should extract all articles from an email`() {
// GIVEN
val email: Email = loadEmail("emails/Pointer/Issue #480.eml")

// WHEN
val publication = handler.process(email)

// THEN
assertSoftly(publication) {
withClue("title") {
title shouldBe "Issue #480"
}
withClue("date") {
date shouldHaveSameDayAs (email.date)
}
withClue("newsletter") {
newsletter.name shouldBe "Pointer"
}
}

publication.articles.map { it.title } shouldBe listOf(
"SPONSOR - Gitpod: Built For Platform Teams",
"Incentives And The Cobra Effect",
"Applying The SPACE Framework",
"How To Successfully Adopt A Developer Tool",
"The Checklist Manifesto",
"How Apple Built iCloud To Store Billions Of Databases",
"The Ten Commandments Of Refactoring",
"Dynamic Programming Is Not Black Magic",
"How Fast Is Your Shell?",
)
}

@Test
fun `should extract article details from an email`() {
// GIVEN
val email: Email = loadEmail("emails/Pointer/Issue #480.eml")

// WHEN
val publication = handler.process(email)

// THEN
assertSoftly(publication.articles[1]) {
withClue("title") {
title shouldBe "Incentives And The Cobra Effect"
}
withClue("link") {
link shouldBe URL("https://pointer.us9.list-manage.com/track/click?u=e9492ff27d760c578a39d0675&id=d20cd3411a&e=0e436c5282")
}
withClue("description") {
description shouldBe "“Incentives are superpowers; set them carefully.” The Cobra Effect is when the solution for a problem unintentionally makes the problem worse. Andrew believe this issue is more widespread than anticipated. He provides several examples, including: everyone sharing feedback directly instead of through managers. This leads to people withholding valuable feedback to maintain relationships or damaging relationships if they can’t share negative feedback elegantly."
}
}
}

@Test
fun `should extract sponsor details from an email`() {
// GIVEN
val email: Email = loadEmail("emails/Pointer/Issue #480.eml")

// WHEN
val publication = handler.process(email)

// THEN
assertSoftly(publication.articles[0]) {
withClue("title") {
title shouldBe "SPONSOR - Gitpod: Built For Platform Teams"
}
withClue("link") {
link shouldBe URL("https://pointer.us9.list-manage.com/track/click?u=e9492ff27d760c578a39d0675&id=2191b13858&e=0e436c5282")
}
withClue("description") {
description shouldBe "Gitpod’s developer platform was built for developers looking to work faster and platform teams looking to work smarter. " +
"It allows them to do two things really well: automate standardization of development environments and always be ready-to-code. " +
"All it takes is adding a .gitpod.yml file to the root of any repository. " +
"Try Gitpod For Free"
}
}
}
}
}
Loading