=htc Fix handling of non-BMP characters in URI encoding/decoding, closes #16014
Also improved the tests for this as they were sub-par so far
This commit is contained in:
parent
c272d9afa5
commit
abf4091bb7
3 changed files with 62 additions and 37 deletions
|
|
@ -5,17 +5,17 @@
|
||||||
package akka.http.model
|
package akka.http.model
|
||||||
|
|
||||||
import language.implicitConversions
|
import language.implicitConversions
|
||||||
|
import java.net.{ Inet4Address, Inet6Address, InetAddress }
|
||||||
import java.lang.{ StringBuilder ⇒ JStringBuilder, Iterable }
|
import java.lang.{ StringBuilder ⇒ JStringBuilder, Iterable }
|
||||||
import java.nio.charset.Charset
|
import java.nio.charset.Charset
|
||||||
import scala.annotation.tailrec
|
import scala.annotation.tailrec
|
||||||
import scala.collection.{ immutable, mutable, LinearSeqOptimized }
|
import scala.collection.{ immutable, mutable, LinearSeqOptimized }
|
||||||
import scala.collection.immutable.LinearSeq
|
import scala.collection.immutable.LinearSeq
|
||||||
import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput, UTF8 }
|
import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput }
|
||||||
import akka.http.model.parser.UriParser
|
import akka.http.model.parser.UriParser
|
||||||
import akka.http.model.parser.CharacterClasses._
|
import akka.http.model.parser.CharacterClasses._
|
||||||
import akka.http.util._
|
import akka.http.util._
|
||||||
import Uri._
|
import Uri._
|
||||||
import java.net.{ Inet4Address, Inet6Address, InetAddress }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An immutable model of an internet URI as defined by http://tools.ietf.org/html/rfc3986.
|
* An immutable model of an internet URI as defined by http://tools.ietf.org/html/rfc3986.
|
||||||
|
|
@ -657,13 +657,12 @@ object Uri {
|
||||||
decodeBytes(i + 1, oredBytes | byte)
|
decodeBytes(i + 1, oredBytes | byte)
|
||||||
} else oredBytes
|
} else oredBytes
|
||||||
|
|
||||||
if ((decodeBytes() >> 7) != 0) { // if non-ASCII chars are present we need to involve the charset for decoding
|
// if we have only ASCII chars and the charset is ASCII compatible we don't need to involve it in decoding
|
||||||
sb.append(new String(bytes, charset))
|
if (((decodeBytes() >> 7) == 0) && UriRendering.isAsciiCompatible(charset)) {
|
||||||
} else {
|
|
||||||
@tailrec def appendBytes(i: Int = 0): Unit =
|
@tailrec def appendBytes(i: Int = 0): Unit =
|
||||||
if (i < bytesCount) { sb.append(bytes(i).toChar); appendBytes(i + 1) }
|
if (i < bytesCount) { sb.append(bytes(i).toChar); appendBytes(i + 1) }
|
||||||
appendBytes()
|
appendBytes()
|
||||||
}
|
} else sb.append(new String(bytes, charset))
|
||||||
decode(string, charset, lastPercentSignIndexPlus3)(sb)
|
decode(string, charset, lastPercentSignIndexPlus3)(sb)
|
||||||
|
|
||||||
case x ⇒ decode(string, charset, ix + 1)(sb.append(x))
|
case x ⇒ decode(string, charset, ix + 1)(sb.append(x))
|
||||||
|
|
@ -750,7 +749,7 @@ object UriRendering {
|
||||||
def render[R <: Rendering](r: R, value: Authority): r.type = renderAuthority(r, value, "", UTF8)
|
def render[R <: Rendering](r: R, value: Authority): r.type = renderAuthority(r, value, "", UTF8)
|
||||||
}
|
}
|
||||||
implicit object PathRenderer extends Renderer[Path] {
|
implicit object PathRenderer extends Renderer[Path] {
|
||||||
def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8, encodeFirstSegmentColons = false)
|
def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8)
|
||||||
}
|
}
|
||||||
implicit object QueryRenderer extends Renderer[Query] {
|
implicit object QueryRenderer extends Renderer[Query] {
|
||||||
def render[R <: Rendering](r: R, value: Query): r.type = renderQuery(r, value, UTF8)
|
def render[R <: Rendering](r: R, value: Query): r.type = renderQuery(r, value, UTF8)
|
||||||
|
|
@ -801,13 +800,14 @@ object UriRendering {
|
||||||
else r
|
else r
|
||||||
}
|
}
|
||||||
|
|
||||||
def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean): r.type = path match {
|
def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean = false): r.type =
|
||||||
case Path.Empty ⇒ r
|
path match {
|
||||||
case Path.Slash(tail) ⇒ renderPath(r ~~ '/', tail, charset, encodeFirstSegmentColons = false)
|
case Path.Empty ⇒ r
|
||||||
case Path.Segment(head, tail) ⇒
|
case Path.Slash(tail) ⇒ renderPath(r ~~ '/', tail, charset)
|
||||||
val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base`
|
case Path.Segment(head, tail) ⇒
|
||||||
renderPath(encode(r, head, charset, keep), tail, charset, encodeFirstSegmentColons = false)
|
val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base`
|
||||||
}
|
renderPath(encode(r, head, charset, keep), tail, charset)
|
||||||
|
}
|
||||||
|
|
||||||
def renderQuery[R <: Rendering](r: R, query: Query, charset: Charset): r.type = {
|
def renderQuery[R <: Rendering](r: R, query: Query, charset: Charset): r.type = {
|
||||||
def enc(s: String): Unit = encode(r, s, charset, `strict-query-char-np`, replaceSpaces = true)
|
def enc(s: String): Unit = encode(r, s, charset, `strict-query-char-np`, replaceSpaces = true)
|
||||||
|
|
@ -827,18 +827,24 @@ object UriRendering {
|
||||||
|
|
||||||
private[http] def encode(r: Rendering, string: String, charset: Charset, keep: CharPredicate,
|
private[http] def encode(r: Rendering, string: String, charset: Charset, keep: CharPredicate,
|
||||||
replaceSpaces: Boolean = false): r.type = {
|
replaceSpaces: Boolean = false): r.type = {
|
||||||
@tailrec def rec(ix: Int = 0): r.type = {
|
val asciiCompatible = isAsciiCompatible(charset)
|
||||||
|
@tailrec def rec(ix: Int): r.type = {
|
||||||
def appendEncoded(byte: Byte): Unit = r ~~ '%' ~~ CharUtils.upperHexDigit(byte >>> 4) ~~ CharUtils.upperHexDigit(byte)
|
def appendEncoded(byte: Byte): Unit = r ~~ '%' ~~ CharUtils.upperHexDigit(byte >>> 4) ~~ CharUtils.upperHexDigit(byte)
|
||||||
if (ix < string.length) {
|
if (ix < string.length) {
|
||||||
string.charAt(ix) match {
|
val charSize = string.charAt(ix) match {
|
||||||
case c if keep(c) ⇒ r ~~ c
|
case c if keep(c) ⇒ { r ~~ c; 1 }
|
||||||
case ' ' if replaceSpaces ⇒ r ~~ '+'
|
case ' ' if replaceSpaces ⇒ { r ~~ '+'; 1 }
|
||||||
case c if c <= 127 ⇒ appendEncoded(c.toByte)
|
case c if c <= 127 && asciiCompatible ⇒ { appendEncoded(c.toByte); 1 }
|
||||||
case c ⇒ c.toString.getBytes(charset).foreach(appendEncoded)
|
case c ⇒
|
||||||
|
def append(s: String) = s.getBytes(charset).foreach(appendEncoded)
|
||||||
|
if (Character.isHighSurrogate(c)) { append(new String(Array(string codePointAt ix), 0, 1)); 2 }
|
||||||
|
else { append(c.toString); 1 }
|
||||||
}
|
}
|
||||||
rec(ix + 1)
|
rec(ix + charSize)
|
||||||
} else r
|
} else r
|
||||||
}
|
}
|
||||||
rec()
|
rec(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private[http] def isAsciiCompatible(cs: Charset) = cs == UTF8 || cs == ISO88591 || cs == ASCII
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,9 @@ import akka.stream.{ Transformer, FlattenStrategy, FlowMaterializer }
|
||||||
|
|
||||||
package object util {
|
package object util {
|
||||||
private[http] val UTF8 = Charset.forName("UTF8")
|
private[http] val UTF8 = Charset.forName("UTF8")
|
||||||
|
private[http] val ASCII = Charset.forName("ASCII")
|
||||||
|
private[http] val ISO88591 = Charset.forName("ISO-8859-1")
|
||||||
|
|
||||||
private[http] val EmptyByteArray = Array.empty[Byte]
|
private[http] val EmptyByteArray = Array.empty[Byte]
|
||||||
|
|
||||||
private[http] def actorSystem(implicit refFactory: ActorRefFactory): ExtendedActorSystem =
|
private[http] def actorSystem(implicit refFactory: ActorRefFactory): ExtendedActorSystem =
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,13 @@
|
||||||
|
|
||||||
package akka.http.model
|
package akka.http.model
|
||||||
|
|
||||||
|
import java.nio.charset.Charset
|
||||||
|
import java.net.InetAddress
|
||||||
|
import akka.http.util.StringRendering
|
||||||
|
import org.scalatest.matchers.{ MatchResult, Matcher }
|
||||||
import org.scalatest.{ Matchers, WordSpec }
|
import org.scalatest.{ Matchers, WordSpec }
|
||||||
import akka.parboiled2.UTF8
|
import akka.parboiled2.UTF8
|
||||||
import Uri._
|
import Uri._
|
||||||
import java.net.InetAddress
|
|
||||||
|
|
||||||
class UriSpec extends WordSpec with Matchers {
|
class UriSpec extends WordSpec with Matchers {
|
||||||
|
|
||||||
|
|
@ -188,20 +191,33 @@ class UriSpec extends WordSpec with Matchers {
|
||||||
"Uri.Path instances" should {
|
"Uri.Path instances" should {
|
||||||
import Path.Empty
|
import Path.Empty
|
||||||
"be parsed and rendered correctly" in {
|
"be parsed and rendered correctly" in {
|
||||||
Path("") shouldEqual Empty
|
def roundTripTo(p: Path, cs: Charset = UTF8) =
|
||||||
Path("/") shouldEqual Path./
|
Matcher[String] { s ⇒
|
||||||
Path("a") shouldEqual "a" :: Empty
|
val rendering = UriRendering.renderPath(new StringRendering, p, cs).get
|
||||||
Path("//") shouldEqual Path./ / ""
|
if (rendering != s) MatchResult(false, s"The path rendered to '$rendering' rather than '$s'", "<?>")
|
||||||
Path("a/") shouldEqual "a" :: Path./
|
else if (Path(s, cs) != p) MatchResult(false, s"The string parsed to '${Path(s, cs)}' rather than '$p'", "<?>")
|
||||||
Path("/a") shouldEqual Path / "a"
|
else MatchResult(true, "<?>", "<?>")
|
||||||
Path("/abc/de/f") shouldEqual Path / "abc" / "de" / "f"
|
}
|
||||||
Path("abc/de/f/") shouldEqual "abc" :: '/' :: "de" :: '/' :: "f" :: Path./
|
|
||||||
Path("abc///de") shouldEqual "abc" :: '/' :: '/' :: '/' :: "de" :: Empty
|
"" should roundTripTo(Empty)
|
||||||
Path("/abc%2F") shouldEqual Path / "abc/"
|
"/" should roundTripTo(Path./)
|
||||||
Path("H%C3%A4ll%C3%B6") shouldEqual """Hällö""" :: Empty
|
"a" should roundTripTo("a" :: Empty)
|
||||||
Path("/%2F%5C") shouldEqual Path / """/\"""
|
"//" should roundTripTo(Path./ / "")
|
||||||
Path("/:foo:/") shouldEqual Path / ":foo:" / ""
|
"a/" should roundTripTo("a" :: Path./)
|
||||||
Path("%2520").head shouldEqual "%20"
|
"/a" should roundTripTo(Path / "a")
|
||||||
|
"/abc/de/f" should roundTripTo(Path / "abc" / "de" / "f")
|
||||||
|
"abc/de/f/" should roundTripTo("abc" :: '/' :: "de" :: '/' :: "f" :: Path./)
|
||||||
|
"abc///de" should roundTripTo("abc" :: '/' :: '/' :: '/' :: "de" :: Empty)
|
||||||
|
"/abc%2F" should roundTripTo(Path / "abc/")
|
||||||
|
"/:foo:/" should roundTripTo(Path / ":foo:" / "")
|
||||||
|
"/%2520" should roundTripTo(Path / "%20")
|
||||||
|
"/foo%20bar" should roundTripTo(Path / "foo bar")
|
||||||
|
"H%C3%A4ll%C3%B6" should roundTripTo("Hällö" :: Empty)
|
||||||
|
"/%2F%5C" should roundTripTo(Path / """/\""")
|
||||||
|
"/foo%F0%9F%92%A9bar" should roundTripTo(Path / "foo\ud83d\udca9bar")
|
||||||
|
"/%C3%89g%20get%20eti%C3%B0%20gler%20%C3%A1n%20%C3%BEess%20a%C3%B0%20mei%C3%B0a%20mig" should
|
||||||
|
roundTripTo(Path / "Ég get etið gler án þess að meiða mig")
|
||||||
|
"/%00%E4%00%F6%00%FC" should roundTripTo(Path / "äöü", Charset.forName("UTF-16BE"))
|
||||||
}
|
}
|
||||||
"support the `startsWith` predicate" in {
|
"support the `startsWith` predicate" in {
|
||||||
Empty startsWith Empty shouldBe true
|
Empty startsWith Empty shouldBe true
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue