Skip to content

Commit

Permalink
Use redirected URL in UnfurlResult
Browse files Browse the repository at this point in the history
  • Loading branch information
saket committed Mar 20, 2022
1 parent bd1d011 commit b11a15f
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 4 deletions.
1 change: 1 addition & 0 deletions cli/src/main/kotlin/me/saket/unfurl/cmd/UnfurlCommand.kt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 41,7 @@ class UnfurlCommand : CliktCommand(name = "unfurl") {
} else {
echo("")
with(unfurled) {
echo("Url: $url")
when (title) {
null -> echo("Title: null")
else -> echo("Title: \"$title\"")
Expand Down
5 changes: 5 additions & 0 deletions unfurl/src/main/kotlin/me/saket/unfurl/UnfurlResult.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 2,11 @@ package me.saket.unfurl

import okhttp3.HttpUrl

/**
* @param url May or may not be equal to the original URL used with [Unfurler.unfurl].
* This can happen in situations where HTTP 3xx redirects are followed. For example,
* `https://youtu.be/foo` will redirect to `https://www.youtube.com/watch?v=foo`.
*/
data class UnfurlResult(
val url: HttpUrl,
val title: String?,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 4,7 @@ import me.saket.unfurl.UnfurlResult
import me.saket.unfurl.delegates.UnfurlerDelegate
import me.saket.unfurl.delegates.UnfurlerDelegateScope
import okhttp3.HttpUrl
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.MediaType
import okhttp3.Request
import org.jsoup.Jsoup
Expand All @@ -15,7 16,7 @@ class HtmlTagsBasedUnfurler(
private val parsers = parsers DefaultHtmlMetadataParser()

override fun UnfurlerDelegateScope.unfurl(url: HttpUrl): UnfurlResult? {
return downloadHtml(url)?.extractMetadata(url)
return downloadHtml(url)?.extractMetadata()
}

private fun UnfurlerDelegateScope.downloadHtml(url: HttpUrl): Document? {
Expand All @@ -33,12 34,14 @@ class HtmlTagsBasedUnfurler(
return try {
httpClient.newCall(request).execute().use { response ->
val body = response.body
val redirectedUrl = response.request.url

if (body != null && body.contentType().isHtmlText()) {
// TODO: stream the HTML body only until a "</head>" is received instead of streaming the entire HTML body.
Jsoup.parse(
/* in */ body.source().inputStream(),
/* charsetName */ null,
/* baseUri */ url.toString()
/* baseUri */ redirectedUrl.toString()
)
} else {
null
Expand All @@ -50,9 53,9 @@ class HtmlTagsBasedUnfurler(
}
}

private fun Document.extractMetadata(url: HttpUrl): UnfurlResult? {
private fun Document.extractMetadata(): UnfurlResult? {
return parsers.asSequence()
.map { it.parse(url, document = this) }
.map { it.parse(url = baseUri().toHttpUrl(), document = this) }
.firstOrNull()
}

Expand Down

0 comments on commit b11a15f

Please sign in to comment.