=htc Fix handling of non-BMP characters in URI encoding/decoding, closes #16014

Also improved the tests for this as they were sub-par so far
This commit is contained in:
Mathias 2014-10-01 17:16:16 +02:00
parent c272d9afa5
commit abf4091bb7
3 changed files with 62 additions and 37 deletions

View file

@ -5,17 +5,17 @@
package akka.http.model
import language.implicitConversions
import java.net.{ Inet4Address, Inet6Address, InetAddress }
import java.lang.{ StringBuilder JStringBuilder, Iterable }
import java.nio.charset.Charset
import scala.annotation.tailrec
import scala.collection.{ immutable, mutable, LinearSeqOptimized }
import scala.collection.immutable.LinearSeq
import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput, UTF8 }
import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput }
import akka.http.model.parser.UriParser
import akka.http.model.parser.CharacterClasses._
import akka.http.util._
import Uri._
import java.net.{ Inet4Address, Inet6Address, InetAddress }
/**
* An immutable model of an internet URI as defined by http://tools.ietf.org/html/rfc3986.
@ -657,13 +657,12 @@ object Uri {
decodeBytes(i + 1, oredBytes | byte)
} else oredBytes
if ((decodeBytes() >> 7) != 0) { // if non-ASCII chars are present we need to involve the charset for decoding
sb.append(new String(bytes, charset))
} else {
// if we have only ASCII chars and the charset is ASCII compatible we don't need to involve it in decoding
if (((decodeBytes() >> 7) == 0) && UriRendering.isAsciiCompatible(charset)) {
@tailrec def appendBytes(i: Int = 0): Unit =
if (i < bytesCount) { sb.append(bytes(i).toChar); appendBytes(i + 1) }
appendBytes()
}
} else sb.append(new String(bytes, charset))
decode(string, charset, lastPercentSignIndexPlus3)(sb)
case x decode(string, charset, ix + 1)(sb.append(x))
@ -750,7 +749,7 @@ object UriRendering {
def render[R <: Rendering](r: R, value: Authority): r.type = renderAuthority(r, value, "", UTF8)
}
implicit object PathRenderer extends Renderer[Path] {
def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8, encodeFirstSegmentColons = false)
def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8)
}
implicit object QueryRenderer extends Renderer[Query] {
def render[R <: Rendering](r: R, value: Query): r.type = renderQuery(r, value, UTF8)
@ -801,13 +800,14 @@ object UriRendering {
else r
}
def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean): r.type = path match {
case Path.Empty r
case Path.Slash(tail) renderPath(r ~~ '/', tail, charset, encodeFirstSegmentColons = false)
case Path.Segment(head, tail)
val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base`
renderPath(encode(r, head, charset, keep), tail, charset, encodeFirstSegmentColons = false)
}
def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean = false): r.type =
path match {
case Path.Empty r
case Path.Slash(tail) renderPath(r ~~ '/', tail, charset)
case Path.Segment(head, tail)
val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base`
renderPath(encode(r, head, charset, keep), tail, charset)
}
def renderQuery[R <: Rendering](r: R, query: Query, charset: Charset): r.type = {
def enc(s: String): Unit = encode(r, s, charset, `strict-query-char-np`, replaceSpaces = true)
@ -827,18 +827,24 @@ object UriRendering {
private[http] def encode(r: Rendering, string: String, charset: Charset, keep: CharPredicate,
replaceSpaces: Boolean = false): r.type = {
@tailrec def rec(ix: Int = 0): r.type = {
val asciiCompatible = isAsciiCompatible(charset)
@tailrec def rec(ix: Int): r.type = {
def appendEncoded(byte: Byte): Unit = r ~~ '%' ~~ CharUtils.upperHexDigit(byte >>> 4) ~~ CharUtils.upperHexDigit(byte)
if (ix < string.length) {
string.charAt(ix) match {
case c if keep(c) r ~~ c
case ' ' if replaceSpaces r ~~ '+'
case c if c <= 127 appendEncoded(c.toByte)
case c c.toString.getBytes(charset).foreach(appendEncoded)
val charSize = string.charAt(ix) match {
case c if keep(c) { r ~~ c; 1 }
case ' ' if replaceSpaces { r ~~ '+'; 1 }
case c if c <= 127 && asciiCompatible { appendEncoded(c.toByte); 1 }
case c
def append(s: String) = s.getBytes(charset).foreach(appendEncoded)
if (Character.isHighSurrogate(c)) { append(new String(Array(string codePointAt ix), 0, 1)); 2 }
else { append(c.toString); 1 }
}
rec(ix + 1)
rec(ix + charSize)
} else r
}
rec()
rec(0)
}
private[http] def isAsciiCompatible(cs: Charset) = cs == UTF8 || cs == ISO88591 || cs == ASCII
}

View file

@ -17,6 +17,9 @@ import akka.stream.{ Transformer, FlattenStrategy, FlowMaterializer }
package object util {
private[http] val UTF8 = Charset.forName("UTF8")
private[http] val ASCII = Charset.forName("ASCII")
private[http] val ISO88591 = Charset.forName("ISO-8859-1")
private[http] val EmptyByteArray = Array.empty[Byte]
private[http] def actorSystem(implicit refFactory: ActorRefFactory): ExtendedActorSystem =

View file

@ -4,10 +4,13 @@
package akka.http.model
import java.nio.charset.Charset
import java.net.InetAddress
import akka.http.util.StringRendering
import org.scalatest.matchers.{ MatchResult, Matcher }
import org.scalatest.{ Matchers, WordSpec }
import akka.parboiled2.UTF8
import Uri._
import java.net.InetAddress
class UriSpec extends WordSpec with Matchers {
@ -188,20 +191,33 @@ class UriSpec extends WordSpec with Matchers {
"Uri.Path instances" should {
import Path.Empty
"be parsed and rendered correctly" in {
Path("") shouldEqual Empty
Path("/") shouldEqual Path./
Path("a") shouldEqual "a" :: Empty
Path("//") shouldEqual Path./ / ""
Path("a/") shouldEqual "a" :: Path./
Path("/a") shouldEqual Path / "a"
Path("/abc/de/f") shouldEqual Path / "abc" / "de" / "f"
Path("abc/de/f/") shouldEqual "abc" :: '/' :: "de" :: '/' :: "f" :: Path./
Path("abc///de") shouldEqual "abc" :: '/' :: '/' :: '/' :: "de" :: Empty
Path("/abc%2F") shouldEqual Path / "abc/"
Path("H%C3%A4ll%C3%B6") shouldEqual """Hällö""" :: Empty
Path("/%2F%5C") shouldEqual Path / """/\"""
Path("/:foo:/") shouldEqual Path / ":foo:" / ""
Path("%2520").head shouldEqual "%20"
def roundTripTo(p: Path, cs: Charset = UTF8) =
Matcher[String] { s
val rendering = UriRendering.renderPath(new StringRendering, p, cs).get
if (rendering != s) MatchResult(false, s"The path rendered to '$rendering' rather than '$s'", "<?>")
else if (Path(s, cs) != p) MatchResult(false, s"The string parsed to '${Path(s, cs)}' rather than '$p'", "<?>")
else MatchResult(true, "<?>", "<?>")
}
"" should roundTripTo(Empty)
"/" should roundTripTo(Path./)
"a" should roundTripTo("a" :: Empty)
"//" should roundTripTo(Path./ / "")
"a/" should roundTripTo("a" :: Path./)
"/a" should roundTripTo(Path / "a")
"/abc/de/f" should roundTripTo(Path / "abc" / "de" / "f")
"abc/de/f/" should roundTripTo("abc" :: '/' :: "de" :: '/' :: "f" :: Path./)
"abc///de" should roundTripTo("abc" :: '/' :: '/' :: '/' :: "de" :: Empty)
"/abc%2F" should roundTripTo(Path / "abc/")
"/:foo:/" should roundTripTo(Path / ":foo:" / "")
"/%2520" should roundTripTo(Path / "%20")
"/foo%20bar" should roundTripTo(Path / "foo bar")
"H%C3%A4ll%C3%B6" should roundTripTo("Hällö" :: Empty)
"/%2F%5C" should roundTripTo(Path / """/\""")
"/foo%F0%9F%92%A9bar" should roundTripTo(Path / "foo\ud83d\udca9bar")
"/%C3%89g%20get%20eti%C3%B0%20gler%20%C3%A1n%20%C3%BEess%20a%C3%B0%20mei%C3%B0a%20mig" should
roundTripTo(Path / "Ég get etið gler án þess að meiða mig")
"/%00%E4%00%F6%00%FC" should roundTripTo(Path / "äöü", Charset.forName("UTF-16BE"))
}
"support the `startsWith` predicate" in {
Empty startsWith Empty shouldBe true