001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.util.encoding;
018
019import java.io.ByteArrayOutputStream;
020import java.io.UnsupportedEncodingException;
021import java.nio.charset.Charset;
022import java.nio.charset.IllegalCharsetNameException;
023import java.nio.charset.UnsupportedCharsetException;
024
025import org.apache.wicket.util.lang.Args;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029/**
030 * Adapted from Spring Framework's UriUtils class, but defines instances for query string decoding versus URL path
031 * component decoding.
032 * <p/>
033 * The difference is important because a space is encoded as a + in a query string, but this is a
034 * valid value in a path component (and is therefore not decode back to a space).
035 *
036 * @author Doug Donohoe
037 * @author Thomas Heigl
038 * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a>
039 */
040public class UrlDecoder
041{
042        private static final Logger LOG = LoggerFactory.getLogger(UrlDecoder.class);
043
044        private final boolean decodePlus;
045
046        /**
047         * Encoder used to decode name or value components of a query string.<br/>
048         * <br/>
049         *
050         * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&amp;asis=thispart
051         */
052        public static final UrlDecoder QUERY_INSTANCE = new UrlDecoder(true);
053
054        /**
055         * Encoder used to decode components of a path.<br/>
056         * <br/>
057         *
058         * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart
059         */
060        public static final UrlDecoder PATH_INSTANCE = new UrlDecoder(false);
061
062        /**
063         * Create decoder
064         *
065         * @param decodePlus
066         *            - whether to decode + to space
067         */
068        private UrlDecoder(final boolean decodePlus)
069        {
070                this.decodePlus = decodePlus;
071        }
072
073        /**
074         * @param s
075         *            string to decode
076         * @param enc
077         *            encoding to decode with
078         * @return decoded string
079         */
080        public String decode(final String s, final String enc)
081        {
082                Args.notNull(enc, "enc");
083
084                try
085                {
086                        return decode(s, Charset.forName(enc));
087                }
088                catch (IllegalCharsetNameException | UnsupportedCharsetException e)
089                {
090                        throw new RuntimeException(new UnsupportedEncodingException(enc));
091                }
092        }
093
094        /**
095         * @param source
096         *            string to decode
097         * @param charset
098         *            encoding to decode with
099         * @return decoded string
100         */
101        public String decode(final String source, final Charset charset)
102        {
103                if (source == null || source.isEmpty())
104                {
105                        return source;
106                }
107
108                Args.notNull(charset, "charset");
109
110                final int length = source.length();
111                final ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
112                boolean changed = false;
113                for (int i = 0; i < length; i++)
114                {
115                        final int ch = source.charAt(i);
116                        if (ch == '%')
117                        {
118                                if (i + 2 < length)
119                                {
120                                        final char hex1 = source.charAt(i + 1);
121                                        final char hex2 = source.charAt(i + 2);
122                                        final int u = Character.digit(hex1, 16);
123                                        final int l = Character.digit(hex2, 16);
124                                        if (u != -1 && l != -1)
125                                        {
126                                                bos.write((char)((u << 4) + l));
127                                                i += 2;
128                                        }
129                                        changed = true;
130                                }
131                                else
132                                {
133                                        LOG.info(
134                                                "Incomplete trailing escape (%) pattern in '{}'. The escape character (%) will be ignored.",
135                                                source);
136                                        changed = true;
137                                }
138                        }
139                        else if (ch == '+')
140                        {
141                                if (decodePlus)
142                                {
143                                        bos.write(' ');
144                                        changed = true;
145                                }
146                                else
147                                {
148                                        bos.write(ch);
149                                }
150                        }
151                        else
152                        {
153                                bos.write(ch);
154                        }
155                }
156                final String result = changed ? new String(bos.toByteArray(), charset) : source;
157                // no trying to filter out bad escapes beforehand, just kill all null bytes here at the end,
158                // that way none will come through
159                return result.replace("\0", "NULL");
160        }
161}