001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.runtimecatalog;
018
019import java.util.ArrayList;
020import java.util.BitSet;
021import java.util.List;
022
023import org.apache.camel.runtimecatalog.Pair;
024
025/**
026 * Encoder for unsafe URI characters.
027 * <p/>
028 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
029 */
030public final class UnsafeUriCharactersEncoder {
031    private static BitSet unsafeCharactersRfc1738;
032    private static BitSet unsafeCharactersHttp;
033    private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
034        'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'};
035
036    static {
037        unsafeCharactersRfc1738 = new BitSet(256);
038        unsafeCharactersRfc1738.set(' ');
039        unsafeCharactersRfc1738.set('"');
040        unsafeCharactersRfc1738.set('<');
041        unsafeCharactersRfc1738.set('>');
042        unsafeCharactersRfc1738.set('#');
043        unsafeCharactersRfc1738.set('%');
044        unsafeCharactersRfc1738.set('{');
045        unsafeCharactersRfc1738.set('}');
046        unsafeCharactersRfc1738.set('|');
047        unsafeCharactersRfc1738.set('\\');
048        unsafeCharactersRfc1738.set('^');
049        unsafeCharactersRfc1738.set('~');
050        unsafeCharactersRfc1738.set('[');
051        unsafeCharactersRfc1738.set(']');
052        unsafeCharactersRfc1738.set('`');
053    }
054
055    static {
056        unsafeCharactersHttp = new BitSet(256);
057        unsafeCharactersHttp.set(' ');
058        unsafeCharactersHttp.set('"');
059        unsafeCharactersHttp.set('<');
060        unsafeCharactersHttp.set('>');
061        unsafeCharactersHttp.set('#');
062        unsafeCharactersHttp.set('%');
063        unsafeCharactersHttp.set('{');
064        unsafeCharactersHttp.set('}');
065        unsafeCharactersHttp.set('|');
066        unsafeCharactersHttp.set('\\');
067        unsafeCharactersHttp.set('^');
068        unsafeCharactersHttp.set('~');
069        unsafeCharactersHttp.set('`');
070    }
071
072    private UnsafeUriCharactersEncoder() {
073        // util class
074    }
075
076    public static String encode(String s) {
077        return encode(s, unsafeCharactersRfc1738);
078    }
079
080    public static String encodeHttpURI(String s) {
081        return encode(s, unsafeCharactersHttp);
082    }
083
084    public static String encode(String s, BitSet unsafeCharacters) {
085        return encode(s, unsafeCharacters, false);
086    }
087
088    public static String encode(String s, boolean checkRaw) {
089        return encode(s, unsafeCharactersRfc1738, checkRaw);
090    }
091
092    public static String encodeHttpURI(String s, boolean checkRaw) {
093        return encode(s, unsafeCharactersHttp, checkRaw);
094    }
095
096    // Just skip the encode for isRAW part
097    public static String encode(String s, BitSet unsafeCharacters, boolean checkRaw) {
098        List<Pair<Integer>> rawPairs;
099        if (checkRaw) {
100            rawPairs = URISupport.scanRaw(s);
101        } else {
102            rawPairs = new ArrayList<>();
103        }
104
105        int n = s == null ? 0 : s.length();
106        if (n == 0) {
107            return s;
108        }
109
110        // First check whether we actually need to encode
111        char chars[] = s.toCharArray();
112        for (int i = 0;;) {
113            // just deal with the ascii character
114            if (chars[i] > 0 && chars[i] < 128) {
115                if (unsafeCharacters.get(chars[i])) {
116                    break;
117                }
118            }
119            if (++i >= chars.length) {
120                return s;
121            }
122        }
123
124        // okay there are some unsafe characters so we do need to encode
125        // see details at: http://en.wikipedia.org/wiki/Url_encode
126        StringBuilder sb = new StringBuilder();
127        for (int i = 0; i < chars.length; i++) {
128            char ch = chars[i];
129            if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
130                // special for % sign as it may be a decimal encoded value
131                if (ch == '%') {
132                    char next = i + 1 < chars.length ? chars[i + 1] : ' ';
133                    char next2 = i + 2 < chars.length ? chars[i + 2] : ' ';
134
135                    if (isHexDigit(next) && isHexDigit(next2) && !URISupport.isRaw(i, rawPairs)) {
136                        // its already encoded (decimal encoded) so just append as is
137                        sb.append(ch);
138                    } else {
139                        // must escape then, as its an unsafe character
140                        appendEscape(sb, (byte) ch);
141                    }
142                } else {
143                    // must escape then, as its an unsafe character
144                    appendEscape(sb, (byte) ch);
145                }
146            } else {
147                sb.append(ch);
148            }
149        }
150        return sb.toString();
151    }
152
153    private static void appendEscape(StringBuilder sb, byte b) {
154        sb.append('%');
155        sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
156        sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
157    }
158
159    private static boolean isHexDigit(char ch) {
160        for (char hex : HEX_DIGITS) {
161            if (hex == ch) {
162                return true;
163            }
164        }
165        return false;
166    }
167
168}