001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.encoding; 018 019import java.io.ByteArrayOutputStream; 020import java.io.UnsupportedEncodingException; 021import java.nio.charset.Charset; 022import java.nio.charset.IllegalCharsetNameException; 023import java.nio.charset.UnsupportedCharsetException; 024 025import org.apache.wicket.util.lang.Args; 026 027/** 028 * Adapted from Spring Framework's UriUtils class, but defines instances for query string encoding versus URL path 029 * component encoding. 030 * <p/> 031 * The difference is important because a space is encoded as a + in a query string, but this is a 032 * valid value in a path component (and is therefore not decode back to a space). 033 * 034 * @author Doug Donohoe 035 * @author Thomas Heigl 036 * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a> 037 */ 038public class UrlEncoder 039{ 040 041 enum Type { 042 //@formatter:off 043 QUERY { 044 @Override 045 public boolean isAllowed(int c) 046 { 047 return isPchar(c) || 048 ' ' == c || // encoding a space to a + is done in the encode() method 049 '*' == c || 050 '/' == c || // to allow direct passing of URL in query 051 ',' == c || 052 ':' == c || // allowed and used in wicket interface 053 '@' == c ; 054 } 055 }, 056 PATH { 057 @Override 058 public boolean isAllowed(int c) 059 { 060 return isPchar(c) || 061 '*' == c || 062 '&' == c || 063 '+' == c || 064 ',' == c || 065 ';' == c || // semicolon is used in ;jsessionid= 066 '=' == c || 067 ':' == c || // allowed and used in wicket interface 068 '@' == c ; 069 070 } 071 }, 072 HEADER { 073 @Override 074 public boolean isAllowed(int c) 075 { 076 return isPchar(c) || 077 '#' == c || 078 '&' == c || 079 '+' == c || 080 '^' == c || 081 '`' == c || 082 '|' ==c; 083 } 084 }; 085 //@formatter:on 086 087 /** 088 * Indicates whether the given character is allowed in this URI component. 089 * @return {@code true} if the character is allowed; {@code false} otherwise 090 */ 091 public abstract boolean isAllowed(int c); 092 093 /** 094 * Indicates whether the given character is in the {@code ALPHA} set. 095 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 096 */ 097 protected boolean isAlpha(int c) 098 { 099 return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'); 100 } 101 102 /** 103 * Indicates whether the given character is in the {@code DIGIT} set. 104 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 105 */ 106 protected boolean isDigit(int c) 107 { 108 return (c >= '0' && c <= '9'); 109 } 110 111 /** 112 * Indicates whether the given character is in the {@code sub-delims} set. 113 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 114 */ 115 protected boolean isSubDelimiter(int c) 116 { 117 return ('!' == c || '$' == c); 118 } 119 120 /** 121 * Indicates whether the given character is in the {@code unreserved} set. 122 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 123 */ 124 protected boolean isUnreserved(int c) 125 { 126 return (isAlpha(c) || isDigit(c) || '-' == c || '.' == c || '_' == c || '~' == c); 127 } 128 129 /** 130 * Indicates whether the given character is in the {@code pchar} set. 131 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 132 */ 133 protected boolean isPchar(int c) 134 { 135 return (isUnreserved(c) || isSubDelimiter(c)); 136 } 137 } 138 139 private final Type type; 140 141 /** 142 * Encoder used to encode name or value components of a query string.<br/> 143 * <br/> 144 * 145 * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart 146 */ 147 public static final UrlEncoder QUERY_INSTANCE = new UrlEncoder(Type.QUERY); 148 149 /** 150 * Encoder used to encode segments of a path.<br/> 151 * <br/> 152 * 153 * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart 154 */ 155 public static final UrlEncoder PATH_INSTANCE = new UrlEncoder(Type.PATH); 156 157 /** 158 * Encoder used to encode a header. 159 */ 160 public static final UrlEncoder HEADER_INSTANCE = new UrlEncoder(Type.HEADER); 161 162 /** 163 * Allow subclass to call constructor. 164 * 165 * @param type 166 * encoder type 167 */ 168 protected UrlEncoder(final Type type) 169 { 170 this.type = type; 171 } 172 173 /** 174 * @param s 175 * string to encode 176 * @param charsetName 177 * charset to use for encoding 178 * @return encoded string 179 */ 180 public String encode(final String s, final String charsetName) 181 { 182 Args.notNull(charsetName, "charsetName"); 183 184 try 185 { 186 return encode(s, Charset.forName(charsetName)); 187 } 188 catch (IllegalCharsetNameException | UnsupportedCharsetException e) 189 { 190 throw new RuntimeException(new UnsupportedEncodingException(charsetName)); 191 } 192 } 193 194 /** 195 * @param unsafeInput 196 * string to encode 197 * @param charset 198 * encoding to use 199 * @return encoded string 200 */ 201 public String encode(final String unsafeInput, final Charset charset) 202 { 203 if (unsafeInput == null || unsafeInput.isEmpty()) 204 { 205 return unsafeInput; 206 } 207 208 Args.notNull(charset, "charset"); 209 210 final byte[] bytes = unsafeInput.getBytes(charset); 211 boolean original = true; 212 for (final byte b : bytes) 213 { 214 if (!type.isAllowed(b) || b == ' ' || b == '\0') 215 { 216 original = false; 217 break; 218 } 219 } 220 if (original) 221 { 222 return unsafeInput; 223 } 224 225 final ByteArrayOutputStream bos = new ByteArrayOutputStream(bytes.length); 226 for (final byte b : bytes) 227 { 228 if (type.isAllowed(b)) 229 { 230 if (b == ' ') 231 { 232 bos.write('+'); 233 } 234 else 235 { 236 bos.write(b); 237 } 238 } 239 else 240 { 241 if (b == '\0') 242 { 243 bos.writeBytes("NULL".getBytes(charset)); 244 } 245 else 246 { 247 bos.write('%'); 248 bos.write(Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16))); 249 bos.write(Character.toUpperCase(Character.forDigit(b & 0xF, 16))); 250 } 251 } 252 } 253 return bos.toString(charset); 254 } 255 256}