=htc Fix handling of non-BMP characters in URI encoding/decoding, closes #16014

Also improved the tests for this as they were sub-par so far
This commit is contained in:
Mathias 2014-10-01 17:16:16 +02:00
parent c272d9afa5
commit abf4091bb7
3 changed files with 62 additions and 37 deletions

View file

@ -5,17 +5,17 @@
package akka.http.model package akka.http.model
import language.implicitConversions import language.implicitConversions
import java.net.{ Inet4Address, Inet6Address, InetAddress }
import java.lang.{ StringBuilder JStringBuilder, Iterable } import java.lang.{ StringBuilder JStringBuilder, Iterable }
import java.nio.charset.Charset import java.nio.charset.Charset
import scala.annotation.tailrec import scala.annotation.tailrec
import scala.collection.{ immutable, mutable, LinearSeqOptimized } import scala.collection.{ immutable, mutable, LinearSeqOptimized }
import scala.collection.immutable.LinearSeq import scala.collection.immutable.LinearSeq
import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput, UTF8 } import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput }
import akka.http.model.parser.UriParser import akka.http.model.parser.UriParser
import akka.http.model.parser.CharacterClasses._ import akka.http.model.parser.CharacterClasses._
import akka.http.util._ import akka.http.util._
import Uri._ import Uri._
import java.net.{ Inet4Address, Inet6Address, InetAddress }
/** /**
* An immutable model of an internet URI as defined by http://tools.ietf.org/html/rfc3986. * An immutable model of an internet URI as defined by http://tools.ietf.org/html/rfc3986.
@ -657,13 +657,12 @@ object Uri {
decodeBytes(i + 1, oredBytes | byte) decodeBytes(i + 1, oredBytes | byte)
} else oredBytes } else oredBytes
if ((decodeBytes() >> 7) != 0) { // if non-ASCII chars are present we need to involve the charset for decoding // if we have only ASCII chars and the charset is ASCII compatible we don't need to involve it in decoding
sb.append(new String(bytes, charset)) if (((decodeBytes() >> 7) == 0) && UriRendering.isAsciiCompatible(charset)) {
} else {
@tailrec def appendBytes(i: Int = 0): Unit = @tailrec def appendBytes(i: Int = 0): Unit =
if (i < bytesCount) { sb.append(bytes(i).toChar); appendBytes(i + 1) } if (i < bytesCount) { sb.append(bytes(i).toChar); appendBytes(i + 1) }
appendBytes() appendBytes()
} } else sb.append(new String(bytes, charset))
decode(string, charset, lastPercentSignIndexPlus3)(sb) decode(string, charset, lastPercentSignIndexPlus3)(sb)
case x decode(string, charset, ix + 1)(sb.append(x)) case x decode(string, charset, ix + 1)(sb.append(x))
@ -750,7 +749,7 @@ object UriRendering {
def render[R <: Rendering](r: R, value: Authority): r.type = renderAuthority(r, value, "", UTF8) def render[R <: Rendering](r: R, value: Authority): r.type = renderAuthority(r, value, "", UTF8)
} }
implicit object PathRenderer extends Renderer[Path] { implicit object PathRenderer extends Renderer[Path] {
def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8, encodeFirstSegmentColons = false) def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8)
} }
implicit object QueryRenderer extends Renderer[Query] { implicit object QueryRenderer extends Renderer[Query] {
def render[R <: Rendering](r: R, value: Query): r.type = renderQuery(r, value, UTF8) def render[R <: Rendering](r: R, value: Query): r.type = renderQuery(r, value, UTF8)
@ -801,13 +800,14 @@ object UriRendering {
else r else r
} }
def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean): r.type = path match { def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean = false): r.type =
case Path.Empty r path match {
case Path.Slash(tail) renderPath(r ~~ '/', tail, charset, encodeFirstSegmentColons = false) case Path.Empty r
case Path.Segment(head, tail) case Path.Slash(tail) renderPath(r ~~ '/', tail, charset)
val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base` case Path.Segment(head, tail)
renderPath(encode(r, head, charset, keep), tail, charset, encodeFirstSegmentColons = false) val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base`
} renderPath(encode(r, head, charset, keep), tail, charset)
}
def renderQuery[R <: Rendering](r: R, query: Query, charset: Charset): r.type = { def renderQuery[R <: Rendering](r: R, query: Query, charset: Charset): r.type = {
def enc(s: String): Unit = encode(r, s, charset, `strict-query-char-np`, replaceSpaces = true) def enc(s: String): Unit = encode(r, s, charset, `strict-query-char-np`, replaceSpaces = true)
@ -827,18 +827,24 @@ object UriRendering {
private[http] def encode(r: Rendering, string: String, charset: Charset, keep: CharPredicate, private[http] def encode(r: Rendering, string: String, charset: Charset, keep: CharPredicate,
replaceSpaces: Boolean = false): r.type = { replaceSpaces: Boolean = false): r.type = {
@tailrec def rec(ix: Int = 0): r.type = { val asciiCompatible = isAsciiCompatible(charset)
@tailrec def rec(ix: Int): r.type = {
def appendEncoded(byte: Byte): Unit = r ~~ '%' ~~ CharUtils.upperHexDigit(byte >>> 4) ~~ CharUtils.upperHexDigit(byte) def appendEncoded(byte: Byte): Unit = r ~~ '%' ~~ CharUtils.upperHexDigit(byte >>> 4) ~~ CharUtils.upperHexDigit(byte)
if (ix < string.length) { if (ix < string.length) {
string.charAt(ix) match { val charSize = string.charAt(ix) match {
case c if keep(c) r ~~ c case c if keep(c) { r ~~ c; 1 }
case ' ' if replaceSpaces r ~~ '+' case ' ' if replaceSpaces { r ~~ '+'; 1 }
case c if c <= 127 appendEncoded(c.toByte) case c if c <= 127 && asciiCompatible { appendEncoded(c.toByte); 1 }
case c c.toString.getBytes(charset).foreach(appendEncoded) case c
def append(s: String) = s.getBytes(charset).foreach(appendEncoded)
if (Character.isHighSurrogate(c)) { append(new String(Array(string codePointAt ix), 0, 1)); 2 }
else { append(c.toString); 1 }
} }
rec(ix + 1) rec(ix + charSize)
} else r } else r
} }
rec() rec(0)
} }
private[http] def isAsciiCompatible(cs: Charset) = cs == UTF8 || cs == ISO88591 || cs == ASCII
} }

View file

@ -17,6 +17,9 @@ import akka.stream.{ Transformer, FlattenStrategy, FlowMaterializer }
package object util { package object util {
private[http] val UTF8 = Charset.forName("UTF8") private[http] val UTF8 = Charset.forName("UTF8")
private[http] val ASCII = Charset.forName("ASCII")
private[http] val ISO88591 = Charset.forName("ISO-8859-1")
private[http] val EmptyByteArray = Array.empty[Byte] private[http] val EmptyByteArray = Array.empty[Byte]
private[http] def actorSystem(implicit refFactory: ActorRefFactory): ExtendedActorSystem = private[http] def actorSystem(implicit refFactory: ActorRefFactory): ExtendedActorSystem =

View file

@ -4,10 +4,13 @@
package akka.http.model package akka.http.model
import java.nio.charset.Charset
import java.net.InetAddress
import akka.http.util.StringRendering
import org.scalatest.matchers.{ MatchResult, Matcher }
import org.scalatest.{ Matchers, WordSpec } import org.scalatest.{ Matchers, WordSpec }
import akka.parboiled2.UTF8 import akka.parboiled2.UTF8
import Uri._ import Uri._
import java.net.InetAddress
class UriSpec extends WordSpec with Matchers { class UriSpec extends WordSpec with Matchers {
@ -188,20 +191,33 @@ class UriSpec extends WordSpec with Matchers {
"Uri.Path instances" should { "Uri.Path instances" should {
import Path.Empty import Path.Empty
"be parsed and rendered correctly" in { "be parsed and rendered correctly" in {
Path("") shouldEqual Empty def roundTripTo(p: Path, cs: Charset = UTF8) =
Path("/") shouldEqual Path./ Matcher[String] { s
Path("a") shouldEqual "a" :: Empty val rendering = UriRendering.renderPath(new StringRendering, p, cs).get
Path("//") shouldEqual Path./ / "" if (rendering != s) MatchResult(false, s"The path rendered to '$rendering' rather than '$s'", "<?>")
Path("a/") shouldEqual "a" :: Path./ else if (Path(s, cs) != p) MatchResult(false, s"The string parsed to '${Path(s, cs)}' rather than '$p'", "<?>")
Path("/a") shouldEqual Path / "a" else MatchResult(true, "<?>", "<?>")
Path("/abc/de/f") shouldEqual Path / "abc" / "de" / "f" }
Path("abc/de/f/") shouldEqual "abc" :: '/' :: "de" :: '/' :: "f" :: Path./
Path("abc///de") shouldEqual "abc" :: '/' :: '/' :: '/' :: "de" :: Empty "" should roundTripTo(Empty)
Path("/abc%2F") shouldEqual Path / "abc/" "/" should roundTripTo(Path./)
Path("H%C3%A4ll%C3%B6") shouldEqual """Hällö""" :: Empty "a" should roundTripTo("a" :: Empty)
Path("/%2F%5C") shouldEqual Path / """/\""" "//" should roundTripTo(Path./ / "")
Path("/:foo:/") shouldEqual Path / ":foo:" / "" "a/" should roundTripTo("a" :: Path./)
Path("%2520").head shouldEqual "%20" "/a" should roundTripTo(Path / "a")
"/abc/de/f" should roundTripTo(Path / "abc" / "de" / "f")
"abc/de/f/" should roundTripTo("abc" :: '/' :: "de" :: '/' :: "f" :: Path./)
"abc///de" should roundTripTo("abc" :: '/' :: '/' :: '/' :: "de" :: Empty)
"/abc%2F" should roundTripTo(Path / "abc/")
"/:foo:/" should roundTripTo(Path / ":foo:" / "")
"/%2520" should roundTripTo(Path / "%20")
"/foo%20bar" should roundTripTo(Path / "foo bar")
"H%C3%A4ll%C3%B6" should roundTripTo("Hällö" :: Empty)
"/%2F%5C" should roundTripTo(Path / """/\""")
"/foo%F0%9F%92%A9bar" should roundTripTo(Path / "foo\ud83d\udca9bar")
"/%C3%89g%20get%20eti%C3%B0%20gler%20%C3%A1n%20%C3%BEess%20a%C3%B0%20mei%C3%B0a%20mig" should
roundTripTo(Path / "Ég get etið gler án þess að meiða mig")
"/%00%E4%00%F6%00%FC" should roundTripTo(Path / "äöü", Charset.forName("UTF-16BE"))
} }
"support the `startsWith` predicate" in { "support the `startsWith` predicate" in {
Empty startsWith Empty shouldBe true Empty startsWith Empty shouldBe true