/*! @license * Copyright 2008 The Closure Library Authors * SPDX-License-Identifier: Apache-2.0 */ /** * @fileoverview Simple utilities for splitting URI strings. * * Uses features of RFC 3986 for parsing/formatting URIs: * http://www.ietf.org/rfc/rfc3986.txt * * @author gboyer@google.com (Garrett Boyer) - The "lightened" design. * @author msamuel@google.com (Mike Samuel) - Domain knowledge and regexes. */ goog.provide('goog.uri.utils'); goog.provide('goog.uri.utils.ComponentIndex'); /** * A regular expression for breaking a URI into its component parts. * * {@link http://www.ietf.org/rfc/rfc3986.txt} says in Appendix B * As the "first-match-wins" algorithm is identical to the "greedy" * disambiguation method used by POSIX regular expressions, it is natural and * commonplace to use a regular expression for parsing the potential five * components of a URI reference. * * The following line is the regular expression for breaking-down a * well-formed URI reference into its components. * *
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * 12 3 4 5 6 7 8 9 ** * The numbers in the second line above are only to assist readability; they * indicate the reference points for each subexpression (i.e., each paired * parenthesis). We refer to the value matched for subexpression
* http://www.ics.uci.edu/pub/ietf/uri/#Related ** results in the following subexpression matches: *
* $1 = http: * $2 = http * $3 = //www.ics.uci.edu * $4 = www.ics.uci.edu * $5 = /pub/ietf/uri/ * $6 =* where* $7 = * $8 = #Related * $9 = Related *
* scheme = $2 * authority = $4 * path = $5 * query = $7 * fragment = $9 ** * The regular expression has been modified slightly to expose the * userInfo, domain, and port separately from the authority. * The modified version yields *
* $1 = http scheme * $2 =* @type {!RegExp} * @private */ goog.uri.utils.splitRe_ = new RegExp( '^' + '(?:' + '([^:/?#.]+)' + // scheme - ignore special characters // used by other URL parts such as :, // ?, /, #, and . ':)?' + '(?://' + '(?:([^/?#]*)@)?' + // userInfo '([^/#?]*?)' + // domain '(?::([0-9]+))?' + // port '(?=[/#?]|$)' + // authority-terminating character ')?' + '([^?#]+)?' + // path '(?:\\?([^#]*))?' + // query '(?:#(.*))?' + // fragment '$'); /** * The index of each URI component in the return value of goog.uri.utils.split. * @enum {number} */ goog.uri.utils.ComponentIndex = { SCHEME: 1, USER_INFO: 2, DOMAIN: 3, PORT: 4, PATH: 5, QUERY_DATA: 6, FRAGMENT: 7 }; /** * Splits a URI into its component parts. * * Each component can be accessed via the component indices; for example: *userInfo -\ * $3 = www.ics.uci.edu domain | authority * $4 = port -/ * $5 = /pub/ietf/uri/ path * $6 = query without ? * $7 = Related fragment without # *
* goog.uri.utils.split(someStr)[goog.uri.utils.CompontentIndex.QUERY_DATA]; ** * @param {string} uri The URI string to examine. * @return {!Array.