From abf4091bb74ff61304859f99197dece96b64ee79 Mon Sep 17 00:00:00 2001 From: Mathias Date: Wed, 1 Oct 2014 17:16:16 +0200 Subject: [PATCH] =htc Fix handling of non-BMP characters in URI encoding/decoding, closes #16014 Also improved the tests for this as they were sub-par so far --- .../src/main/scala/akka/http/model/Uri.scala | 50 +++++++++++-------- .../main/scala/akka/http/util/package.scala | 3 ++ .../test/scala/akka/http/model/UriSpec.scala | 46 +++++++++++------ 3 files changed, 62 insertions(+), 37 deletions(-) diff --git a/akka-http-core/src/main/scala/akka/http/model/Uri.scala b/akka-http-core/src/main/scala/akka/http/model/Uri.scala index 1e411eae3a..db7d1ee2ad 100644 --- a/akka-http-core/src/main/scala/akka/http/model/Uri.scala +++ b/akka-http-core/src/main/scala/akka/http/model/Uri.scala @@ -5,17 +5,17 @@ package akka.http.model import language.implicitConversions +import java.net.{ Inet4Address, Inet6Address, InetAddress } import java.lang.{ StringBuilder ⇒ JStringBuilder, Iterable } import java.nio.charset.Charset import scala.annotation.tailrec import scala.collection.{ immutable, mutable, LinearSeqOptimized } import scala.collection.immutable.LinearSeq -import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput, UTF8 } +import akka.parboiled2.{ CharUtils, CharPredicate, ParserInput } import akka.http.model.parser.UriParser import akka.http.model.parser.CharacterClasses._ import akka.http.util._ import Uri._ -import java.net.{ Inet4Address, Inet6Address, InetAddress } /** * An immutable model of an internet URI as defined by http://tools.ietf.org/html/rfc3986. @@ -657,13 +657,12 @@ object Uri { decodeBytes(i + 1, oredBytes | byte) } else oredBytes - if ((decodeBytes() >> 7) != 0) { // if non-ASCII chars are present we need to involve the charset for decoding - sb.append(new String(bytes, charset)) - } else { + // if we have only ASCII chars and the charset is ASCII compatible we don't need to involve it in decoding + if (((decodeBytes() >> 7) == 0) && UriRendering.isAsciiCompatible(charset)) { @tailrec def appendBytes(i: Int = 0): Unit = if (i < bytesCount) { sb.append(bytes(i).toChar); appendBytes(i + 1) } appendBytes() - } + } else sb.append(new String(bytes, charset)) decode(string, charset, lastPercentSignIndexPlus3)(sb) case x ⇒ decode(string, charset, ix + 1)(sb.append(x)) @@ -750,7 +749,7 @@ object UriRendering { def render[R <: Rendering](r: R, value: Authority): r.type = renderAuthority(r, value, "", UTF8) } implicit object PathRenderer extends Renderer[Path] { - def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8, encodeFirstSegmentColons = false) + def render[R <: Rendering](r: R, value: Path): r.type = renderPath(r, value, UTF8) } implicit object QueryRenderer extends Renderer[Query] { def render[R <: Rendering](r: R, value: Query): r.type = renderQuery(r, value, UTF8) @@ -801,13 +800,14 @@ object UriRendering { else r } - def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean): r.type = path match { - case Path.Empty ⇒ r - case Path.Slash(tail) ⇒ renderPath(r ~~ '/', tail, charset, encodeFirstSegmentColons = false) - case Path.Segment(head, tail) ⇒ - val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base` - renderPath(encode(r, head, charset, keep), tail, charset, encodeFirstSegmentColons = false) - } + def renderPath[R <: Rendering](r: R, path: Path, charset: Charset, encodeFirstSegmentColons: Boolean = false): r.type = + path match { + case Path.Empty ⇒ r + case Path.Slash(tail) ⇒ renderPath(r ~~ '/', tail, charset) + case Path.Segment(head, tail) ⇒ + val keep = if (encodeFirstSegmentColons) `pchar-base-nc` else `pchar-base` + renderPath(encode(r, head, charset, keep), tail, charset) + } def renderQuery[R <: Rendering](r: R, query: Query, charset: Charset): r.type = { def enc(s: String): Unit = encode(r, s, charset, `strict-query-char-np`, replaceSpaces = true) @@ -827,18 +827,24 @@ object UriRendering { private[http] def encode(r: Rendering, string: String, charset: Charset, keep: CharPredicate, replaceSpaces: Boolean = false): r.type = { - @tailrec def rec(ix: Int = 0): r.type = { + val asciiCompatible = isAsciiCompatible(charset) + @tailrec def rec(ix: Int): r.type = { def appendEncoded(byte: Byte): Unit = r ~~ '%' ~~ CharUtils.upperHexDigit(byte >>> 4) ~~ CharUtils.upperHexDigit(byte) if (ix < string.length) { - string.charAt(ix) match { - case c if keep(c) ⇒ r ~~ c - case ' ' if replaceSpaces ⇒ r ~~ '+' - case c if c <= 127 ⇒ appendEncoded(c.toByte) - case c ⇒ c.toString.getBytes(charset).foreach(appendEncoded) + val charSize = string.charAt(ix) match { + case c if keep(c) ⇒ { r ~~ c; 1 } + case ' ' if replaceSpaces ⇒ { r ~~ '+'; 1 } + case c if c <= 127 && asciiCompatible ⇒ { appendEncoded(c.toByte); 1 } + case c ⇒ + def append(s: String) = s.getBytes(charset).foreach(appendEncoded) + if (Character.isHighSurrogate(c)) { append(new String(Array(string codePointAt ix), 0, 1)); 2 } + else { append(c.toString); 1 } } - rec(ix + 1) + rec(ix + charSize) } else r } - rec() + rec(0) } + + private[http] def isAsciiCompatible(cs: Charset) = cs == UTF8 || cs == ISO88591 || cs == ASCII } diff --git a/akka-http-core/src/main/scala/akka/http/util/package.scala b/akka-http-core/src/main/scala/akka/http/util/package.scala index 733b22e192..a63854623f 100644 --- a/akka-http-core/src/main/scala/akka/http/util/package.scala +++ b/akka-http-core/src/main/scala/akka/http/util/package.scala @@ -17,6 +17,9 @@ import akka.stream.{ Transformer, FlattenStrategy, FlowMaterializer } package object util { private[http] val UTF8 = Charset.forName("UTF8") + private[http] val ASCII = Charset.forName("ASCII") + private[http] val ISO88591 = Charset.forName("ISO-8859-1") + private[http] val EmptyByteArray = Array.empty[Byte] private[http] def actorSystem(implicit refFactory: ActorRefFactory): ExtendedActorSystem = diff --git a/akka-http-core/src/test/scala/akka/http/model/UriSpec.scala b/akka-http-core/src/test/scala/akka/http/model/UriSpec.scala index d9cb8e7035..d37b02533e 100644 --- a/akka-http-core/src/test/scala/akka/http/model/UriSpec.scala +++ b/akka-http-core/src/test/scala/akka/http/model/UriSpec.scala @@ -4,10 +4,13 @@ package akka.http.model +import java.nio.charset.Charset +import java.net.InetAddress +import akka.http.util.StringRendering +import org.scalatest.matchers.{ MatchResult, Matcher } import org.scalatest.{ Matchers, WordSpec } import akka.parboiled2.UTF8 import Uri._ -import java.net.InetAddress class UriSpec extends WordSpec with Matchers { @@ -188,20 +191,33 @@ class UriSpec extends WordSpec with Matchers { "Uri.Path instances" should { import Path.Empty "be parsed and rendered correctly" in { - Path("") shouldEqual Empty - Path("/") shouldEqual Path./ - Path("a") shouldEqual "a" :: Empty - Path("//") shouldEqual Path./ / "" - Path("a/") shouldEqual "a" :: Path./ - Path("/a") shouldEqual Path / "a" - Path("/abc/de/f") shouldEqual Path / "abc" / "de" / "f" - Path("abc/de/f/") shouldEqual "abc" :: '/' :: "de" :: '/' :: "f" :: Path./ - Path("abc///de") shouldEqual "abc" :: '/' :: '/' :: '/' :: "de" :: Empty - Path("/abc%2F") shouldEqual Path / "abc/" - Path("H%C3%A4ll%C3%B6") shouldEqual """Hällö""" :: Empty - Path("/%2F%5C") shouldEqual Path / """/\""" - Path("/:foo:/") shouldEqual Path / ":foo:" / "" - Path("%2520").head shouldEqual "%20" + def roundTripTo(p: Path, cs: Charset = UTF8) = + Matcher[String] { s ⇒ + val rendering = UriRendering.renderPath(new StringRendering, p, cs).get + if (rendering != s) MatchResult(false, s"The path rendered to '$rendering' rather than '$s'", "") + else if (Path(s, cs) != p) MatchResult(false, s"The string parsed to '${Path(s, cs)}' rather than '$p'", "") + else MatchResult(true, "", "") + } + + "" should roundTripTo(Empty) + "/" should roundTripTo(Path./) + "a" should roundTripTo("a" :: Empty) + "//" should roundTripTo(Path./ / "") + "a/" should roundTripTo("a" :: Path./) + "/a" should roundTripTo(Path / "a") + "/abc/de/f" should roundTripTo(Path / "abc" / "de" / "f") + "abc/de/f/" should roundTripTo("abc" :: '/' :: "de" :: '/' :: "f" :: Path./) + "abc///de" should roundTripTo("abc" :: '/' :: '/' :: '/' :: "de" :: Empty) + "/abc%2F" should roundTripTo(Path / "abc/") + "/:foo:/" should roundTripTo(Path / ":foo:" / "") + "/%2520" should roundTripTo(Path / "%20") + "/foo%20bar" should roundTripTo(Path / "foo bar") + "H%C3%A4ll%C3%B6" should roundTripTo("Hällö" :: Empty) + "/%2F%5C" should roundTripTo(Path / """/\""") + "/foo%F0%9F%92%A9bar" should roundTripTo(Path / "foo\ud83d\udca9bar") + "/%C3%89g%20get%20eti%C3%B0%20gler%20%C3%A1n%20%C3%BEess%20a%C3%B0%20mei%C3%B0a%20mig" should + roundTripTo(Path / "Ég get etið gler án þess að meiða mig") + "/%00%E4%00%F6%00%FC" should roundTripTo(Path / "äöü", Charset.forName("UTF-16BE")) } "support the `startsWith` predicate" in { Empty startsWith Empty shouldBe true