001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.encoding; 018 019import java.io.ByteArrayOutputStream; 020import java.io.UnsupportedEncodingException; 021import java.nio.charset.Charset; 022import java.nio.charset.IllegalCharsetNameException; 023import java.nio.charset.UnsupportedCharsetException; 024 025import org.apache.wicket.util.lang.Args; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028 029/** 030 * Adapted from Spring Framework's UriUtils class, but defines instances for query string decoding versus URL path 031 * component decoding. 032 * <p/> 033 * The difference is important because a space is encoded as a + in a query string, but this is a 034 * valid value in a path component (and is therefore not decode back to a space). 035 * 036 * @author Doug Donohoe 037 * @author Thomas Heigl 038 * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a> 039 */ 040public class UrlDecoder 041{ 042 private static final Logger LOG = LoggerFactory.getLogger(UrlDecoder.class); 043 044 private final boolean decodePlus; 045 046 /** 047 * Encoder used to decode name or value components of a query string.<br/> 048 * <br/> 049 * 050 * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart 051 */ 052 public static final UrlDecoder QUERY_INSTANCE = new UrlDecoder(true); 053 054 /** 055 * Encoder used to decode components of a path.<br/> 056 * <br/> 057 * 058 * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart 059 */ 060 public static final UrlDecoder PATH_INSTANCE = new UrlDecoder(false); 061 062 /** 063 * Create decoder 064 * 065 * @param decodePlus 066 * - whether to decode + to space 067 */ 068 private UrlDecoder(final boolean decodePlus) 069 { 070 this.decodePlus = decodePlus; 071 } 072 073 /** 074 * @param s 075 * string to decode 076 * @param enc 077 * encoding to decode with 078 * @return decoded string 079 */ 080 public String decode(final String s, final String enc) 081 { 082 Args.notNull(enc, "enc"); 083 084 try 085 { 086 return decode(s, Charset.forName(enc)); 087 } 088 catch (IllegalCharsetNameException | UnsupportedCharsetException e) 089 { 090 throw new RuntimeException(new UnsupportedEncodingException(enc)); 091 } 092 } 093 094 /** 095 * @param source 096 * string to decode 097 * @param charset 098 * encoding to decode with 099 * @return decoded string 100 */ 101 public String decode(final String source, final Charset charset) 102 { 103 if (source == null || source.isEmpty()) 104 { 105 return source; 106 } 107 108 Args.notNull(charset, "charset"); 109 110 final int length = source.length(); 111 final ByteArrayOutputStream bos = new ByteArrayOutputStream(length); 112 boolean changed = false; 113 for (int i = 0; i < length; i++) 114 { 115 final int ch = source.charAt(i); 116 if (ch == '%') 117 { 118 if (i + 2 < length) 119 { 120 final char hex1 = source.charAt(i + 1); 121 final char hex2 = source.charAt(i + 2); 122 final int u = Character.digit(hex1, 16); 123 final int l = Character.digit(hex2, 16); 124 if (u != -1 && l != -1) 125 { 126 bos.write((char)((u << 4) + l)); 127 i += 2; 128 } 129 changed = true; 130 } 131 else 132 { 133 LOG.info( 134 "Incomplete trailing escape (%) pattern in '{}'. The escape character (%) will be ignored.", 135 source); 136 changed = true; 137 } 138 } 139 else if (ch == '+') 140 { 141 if (decodePlus) 142 { 143 bos.write(' '); 144 changed = true; 145 } 146 else 147 { 148 bos.write(ch); 149 } 150 } 151 else 152 { 153 bos.write(ch); 154 } 155 } 156 final String result = changed ? new String(bos.toByteArray(), charset) : source; 157 // no trying to filter out bad escapes beforehand, just kill all null bytes here at the end, 158 // that way none will come through 159 return result.replace("\0", "NULL"); 160 } 161}