=htc Fix handling of non-BMP characters in URI encoding/decoding, closes #16014
Also improved the tests for this as they were sub-par so far
This commit is contained in:
parent
c272d9afa5
commit
abf4091bb7
3 changed files with 62 additions and 37 deletions
|
|
@ -5,17 +5,17 @@
|
|||
package akka.http.model
|
||||
|
||||
import language.implicitConversions
|
||||
import java.net.{ Inet4Address, Inet6Address, InetAddress }
|
||||
import java.lang.{ StringBuilder ⇒ JStringBuilder, Iterable }
|
||||
import java.nio.charset.Charset
|
||||
import scala.annotation.tailrec
|
||||
import scala.collection.{ immutable, mutable, LinearSeqOptimized }
|
||||
import scala.collection.immutable.LinearSeq
|
||||
import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput, UTF8 }
|
||||
import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput }
|
||||
import akka.http.model.parser.UriParser
|
||||
import akka.http.model.parser.CharacterClasses._
|
||||
import akka.http.util._
|
||||
import Uri._
|
||||
import java.net.{ Inet4Address, Inet6Address, InetAddress }
|
||||
|
||||
/**
|
||||
* An immutable model of an internet URI as defined by http://tools.ietf.org/html/rfc3986.
|
||||
|
|
@ -657,13 +657,12 @@ object Uri {
|
|||
decodeBytes(i + 1, oredBytes | byte)
|
||||
} else oredBytes
|
||||
|
||||
if ((decodeBytes() >> 7) != 0) { // if non-ASCII chars are present we need to involve the charset for decoding
|
||||
sb.append(new String(bytes, charset))
|
||||
} else {
|
||||
// if we have only ASCII chars and the charset is ASCII compatible we don't need to involve it in decoding
|
||||
if (((decodeBytes() >> 7) == 0) && UriRendering.isAsciiCompatible(charset)) {
|
||||
@tailrec def appendBytes(i: Int = 0): Unit =
|
||||
if (i < bytesCount) { sb.append(bytes(i).toChar); appendBytes(i + 1) }
|
||||
appendBytes()
|
||||
}
|
||||
} else sb.append(new String(bytes, charset))
|
||||
decode(string, charset, lastPercentSignIndexPlus3)(sb)
|
||||
|
||||
case x ⇒ decode(string, charset, ix + 1)(sb.append(x))
|
||||
|
|
@ -750,7 +749,7 @@ object UriRendering {
|
|||
def render[R <: Rendering](r: R, value: Authority): r.type = renderAuthority(r, value, "", UTF8)
|
||||
}
|
||||
implicit object PathRenderer extends Renderer[Path] {
|
||||
def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8, encodeFirstSegmentColons = false)
|
||||
def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8)
|
||||
}
|
||||
implicit object QueryRenderer extends Renderer[Query] {
|
||||
def render[R <: Rendering](r: R, value: Query): r.type = renderQuery(r, value, UTF8)
|
||||
|
|
@ -801,13 +800,14 @@ object UriRendering {
|
|||
else r
|
||||
}
|
||||
|
||||
def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean): r.type = path match {
|
||||
case Path.Empty ⇒ r
|
||||
case Path.Slash(tail) ⇒ renderPath(r ~~ '/', tail, charset, encodeFirstSegmentColons = false)
|
||||
case Path.Segment(head, tail) ⇒
|
||||
val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base`
|
||||
renderPath(encode(r, head, charset, keep), tail, charset, encodeFirstSegmentColons = false)
|
||||
}
|
||||
def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean = false): r.type =
|
||||
path match {
|
||||
case Path.Empty ⇒ r
|
||||
case Path.Slash(tail) ⇒ renderPath(r ~~ '/', tail, charset)
|
||||
case Path.Segment(head, tail) ⇒
|
||||
val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base`
|
||||
renderPath(encode(r, head, charset, keep), tail, charset)
|
||||
}
|
||||
|
||||
def renderQuery[R <: Rendering](r: R, query: Query, charset: Charset): r.type = {
|
||||
def enc(s: String): Unit = encode(r, s, charset, `strict-query-char-np`, replaceSpaces = true)
|
||||
|
|
@ -827,18 +827,24 @@ object UriRendering {
|
|||
|
||||
private[http] def encode(r: Rendering, string: String, charset: Charset, keep: CharPredicate,
|
||||
replaceSpaces: Boolean = false): r.type = {
|
||||
@tailrec def rec(ix: Int = 0): r.type = {
|
||||
val asciiCompatible = isAsciiCompatible(charset)
|
||||
@tailrec def rec(ix: Int): r.type = {
|
||||
def appendEncoded(byte: Byte): Unit = r ~~ '%' ~~ CharUtils.upperHexDigit(byte >>> 4) ~~ CharUtils.upperHexDigit(byte)
|
||||
if (ix < string.length) {
|
||||
string.charAt(ix) match {
|
||||
case c if keep(c) ⇒ r ~~ c
|
||||
case ' ' if replaceSpaces ⇒ r ~~ '+'
|
||||
case c if c <= 127 ⇒ appendEncoded(c.toByte)
|
||||
case c ⇒ c.toString.getBytes(charset).foreach(appendEncoded)
|
||||
val charSize = string.charAt(ix) match {
|
||||
case c if keep(c) ⇒ { r ~~ c; 1 }
|
||||
case ' ' if replaceSpaces ⇒ { r ~~ '+'; 1 }
|
||||
case c if c <= 127 && asciiCompatible ⇒ { appendEncoded(c.toByte); 1 }
|
||||
case c ⇒
|
||||
def append(s: String) = s.getBytes(charset).foreach(appendEncoded)
|
||||
if (Character.isHighSurrogate(c)) { append(new String(Array(string codePointAt ix), 0, 1)); 2 }
|
||||
else { append(c.toString); 1 }
|
||||
}
|
||||
rec(ix + 1)
|
||||
rec(ix + charSize)
|
||||
} else r
|
||||
}
|
||||
rec()
|
||||
rec(0)
|
||||
}
|
||||
|
||||
private[http] def isAsciiCompatible(cs: Charset) = cs == UTF8 || cs == ISO88591 || cs == ASCII
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,9 @@ import akka.stream.{ Transformer, FlattenStrategy, FlowMaterializer }
|
|||
|
||||
package object util {
|
||||
private[http] val UTF8 = Charset.forName("UTF8")
|
||||
private[http] val ASCII = Charset.forName("ASCII")
|
||||
private[http] val ISO88591 = Charset.forName("ISO-8859-1")
|
||||
|
||||
private[http] val EmptyByteArray = Array.empty[Byte]
|
||||
|
||||
private[http] def actorSystem(implicit refFactory: ActorRefFactory): ExtendedActorSystem =
|
||||
|
|
|
|||
|
|
@ -4,10 +4,13 @@
|
|||
|
||||
package akka.http.model
|
||||
|
||||
import java.nio.charset.Charset
|
||||
import java.net.InetAddress
|
||||
import akka.http.util.StringRendering
|
||||
import org.scalatest.matchers.{ MatchResult, Matcher }
|
||||
import org.scalatest.{ Matchers, WordSpec }
|
||||
import akka.parboiled2.UTF8
|
||||
import Uri._
|
||||
import java.net.InetAddress
|
||||
|
||||
class UriSpec extends WordSpec with Matchers {
|
||||
|
||||
|
|
@ -188,20 +191,33 @@ class UriSpec extends WordSpec with Matchers {
|
|||
"Uri.Path instances" should {
|
||||
import Path.Empty
|
||||
"be parsed and rendered correctly" in {
|
||||
Path("") shouldEqual Empty
|
||||
Path("/") shouldEqual Path./
|
||||
Path("a") shouldEqual "a" :: Empty
|
||||
Path("//") shouldEqual Path./ / ""
|
||||
Path("a/") shouldEqual "a" :: Path./
|
||||
Path("/a") shouldEqual Path / "a"
|
||||
Path("/abc/de/f") shouldEqual Path / "abc" / "de" / "f"
|
||||
Path("abc/de/f/") shouldEqual "abc" :: '/' :: "de" :: '/' :: "f" :: Path./
|
||||
Path("abc///de") shouldEqual "abc" :: '/' :: '/' :: '/' :: "de" :: Empty
|
||||
Path("/abc%2F") shouldEqual Path / "abc/"
|
||||
Path("H%C3%A4ll%C3%B6") shouldEqual """Hällö""" :: Empty
|
||||
Path("/%2F%5C") shouldEqual Path / """/\"""
|
||||
Path("/:foo:/") shouldEqual Path / ":foo:" / ""
|
||||
Path("%2520").head shouldEqual "%20"
|
||||
def roundTripTo(p: Path, cs: Charset = UTF8) =
|
||||
Matcher[String] { s ⇒
|
||||
val rendering = UriRendering.renderPath(new StringRendering, p, cs).get
|
||||
if (rendering != s) MatchResult(false, s"The path rendered to '$rendering' rather than '$s'", "<?>")
|
||||
else if (Path(s, cs) != p) MatchResult(false, s"The string parsed to '${Path(s, cs)}' rather than '$p'", "<?>")
|
||||
else MatchResult(true, "<?>", "<?>")
|
||||
}
|
||||
|
||||
"" should roundTripTo(Empty)
|
||||
"/" should roundTripTo(Path./)
|
||||
"a" should roundTripTo("a" :: Empty)
|
||||
"//" should roundTripTo(Path./ / "")
|
||||
"a/" should roundTripTo("a" :: Path./)
|
||||
"/a" should roundTripTo(Path / "a")
|
||||
"/abc/de/f" should roundTripTo(Path / "abc" / "de" / "f")
|
||||
"abc/de/f/" should roundTripTo("abc" :: '/' :: "de" :: '/' :: "f" :: Path./)
|
||||
"abc///de" should roundTripTo("abc" :: '/' :: '/' :: '/' :: "de" :: Empty)
|
||||
"/abc%2F" should roundTripTo(Path / "abc/")
|
||||
"/:foo:/" should roundTripTo(Path / ":foo:" / "")
|
||||
"/%2520" should roundTripTo(Path / "%20")
|
||||
"/foo%20bar" should roundTripTo(Path / "foo bar")
|
||||
"H%C3%A4ll%C3%B6" should roundTripTo("Hällö" :: Empty)
|
||||
"/%2F%5C" should roundTripTo(Path / """/\""")
|
||||
"/foo%F0%9F%92%A9bar" should roundTripTo(Path / "foo\ud83d\udca9bar")
|
||||
"/%C3%89g%20get%20eti%C3%B0%20gler%20%C3%A1n%20%C3%BEess%20a%C3%B0%20mei%C3%B0a%20mig" should
|
||||
roundTripTo(Path / "Ég get etið gler án þess að meiða mig")
|
||||
"/%00%E4%00%F6%00%FC" should roundTripTo(Path / "äöü", Charset.forName("UTF-16BE"))
|
||||
}
|
||||
"support the `startsWith` predicate" in {
|
||||
Empty startsWith Empty shouldBe true
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue