1. Overview
1.What is Base64:
Base64 is one of the most common encoding methods for transmitting 8-bit byte codes on the Internet. You can check RFC2045~RFC2049, which has detailed specifications for MIME. Base64 encoding can be used to pass longer identification information in an HTTP environment. For example, in the Java Persistence system Hibernate, Base64 is used to encode a long unique identifier (usually a 128-bit UUID) into a string, which is used as parameters in HTTP forms and HTTP GET URLs. In other applications, it is often necessary to encode binary data into a form suitable for placement in a URL (including hidden form fields). At this time, using Base64 encoding is not only shorter, but also unreadable, that is, the encoded data will not be directly visible to the naked eye.
2. Introduction:
Standard Base64 is not suitable for transmission directly in the URL, because the URL encoder will change the "/" and "+" characters in standard Base64 into the form of "%XX", and these "%" characters exist When entering the database, further conversion is required because the "%" sign has been used as a wildcard character in ANSI SQL.
To solve this problem, an improved Base64 encoding for URLs can be used, which does not pad the '=' sign at the end and changes the "+" and "/" in standard Base64 to "*" and "-" respectively. , This eliminates the need for conversion during URL encoding, decoding and database storage, avoids the increase in the length of encoded information in the process, and unifies the format of object identifiers in databases, forms, etc.
There is also an improved Base64 variant for regular expressions, which changes "+" and "/" to "!" and "-" because "+", "*" and " Both [" and "]" may have special meanings in regular expressions.
There are also variants that change "+/" to "_-" or "._" (used as identifier names in programming languages) or ".-" (used for Nmtokens in XML) or even "_ :" (for Name in XML).
Base64 requires converting every three 8Bit bytes into four 6Bit bytes (3*8 = 4*6 = 24), and then adding two high-bit 0s to the 6Bit to form four 8Bit bytes, that is Say, the converted string will theoretically be 1/3 longer than the original one.
3. Rules:
Rules about this encoding:
①.Convert 3 characters into 4 characters..
②.Add a newline character every 76 characters..
③.The final terminator must also be processed..
Isn’t that too abstract? Don’t worry, let’s look at an example:
Before conversion: aaaaaabb ccccdddd eeffffff
After conversion: 00aaaaaa 00bbcccc 00ddddee 00ffffff
It should be clear, right? The upper three bytes are the original text, and the lower four bytes are the converted Base64 encoding, with the first two bits being 0.
After conversion, we use a code table to get the string we want (that is, the final Base64 encoding)
2. Java implementation code example:
public final class Base64 { static private final int BASELENGTH = 255; static private final int LOOKUPLENGTH = 64; static private final int TWENTYFOURBITGROUP = 24; static private final int EIGHTBIT = 8; static private final int SIXTEENBIT = 16; static private final int SIXBIT = 6; static private final int FOURBYTE = 4; static private final int SIGN = -128; static private final char PAD = '='; static private final boolean fDebug = false; static final private byte[] base64Alphabet = new byte[BASELENGTH]; static final private char[] lookUpBase64Alphabet = new char[LOOKUPLENGTH ]; static { for (int i = 0; i < BASELENGTH; i++) { base64Alphabet[i] = -1; } for (int i = 'Z'; i >= 'A'; i--) { base64Alphabet[i] = (byte) (i - 'A'); } for (int i = 'z'; i >= 'a'; i--) { base64Alphabet[i] = (byte) (i - 'a' + 26); } for (int i = '9'; i >= '0'; i--) { base64Alphabet[i] = (byte) (i - '0' + 52); } base64Alphabet['+'] = 62; base64Alphabet['/'] = 63; for ( int i = 0; i <= 25; i++) lookUpBase64Alphabet[i] = (char) ('A' + i); for (int i = 26, j = 0; i <= 51; i++, j++) lookUpBase64Alphabet[i] = (char) ('a' + j); for (int i = 52, j = 0; i <= 61; i++, j++ ) lookUpBase64Alphabet[i] = (char) ('0' + j); lookUpBase64Alphabet[62] = (char) '+'; lookUpBase64Alphabet[63] = (char) '/'; } protected static boolean isWhiteSpace(char octect) { return (octect == 0x20 || octect == 0xd || octect == 0xa || octect == 0x9); } protected static boolean isPad(char octect) { return (octect == PAD); } protected static boolean isData(char octect) { return (base64Alphabet[octect] != -1); } protected static boolean isBase64(char octect) { return (isWhiteSpace(octect) || isPad(octect) || isData(octect) ); } /** * Encodes hex octects into Base64 * * @param binaryData * Array containing binaryData * @return Encoded Base64 array */ public static String encode(byte[] binaryData) { if (binaryData == null) return null; int lengthDataBits = binaryData.length * EIGHTBIT; if (lengthDataBits == 0) { return "" ; } int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP; int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP; int numberQuartet = fewerThan24bits != 0 ? numberTriplets + 1 : numberTriplets; int numberLines = (numberQuartet - 1) / 19 + 1; char encodedData[] = null; encodedData = new char[numberQuartet * 4 + numberLines]; byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0; int encodedIndex = 0; int dataIndex = 0; int i = 0; if (fDebug) { System.out.println("number of triplets = " + numberTriplets); } for (int line = 0; line < numberLines - 1; line++) { for (int quartet = 0; quartet < 19; quartet++) { b1 = binaryData[dataIndex++]; b2 = binaryData[dataIndex++]; b3 = binaryData[dataIndex++]; if (fDebug) { System.out.println("b1= " + b1 + ", b2= " + b2 + ", b3= " + b3); } l = (byte) (b2 & 0x0f); k = (byte) (b1 & 0x03); byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); byte val2 = ((b2 & SIGN) == 0) ? ( byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc); if (fDebug) { System.out.println("val2 = " + val2); System.out. println("k4 = " + (k << 4)); System.out.println("vak = " + (val2 | (k << 4))); } encodedData[encodedIndex++] = lookUpBase64Alphabet[val1]; encodedData[encodedIndex++] = lookUpBase64Alphabet[val2 | (k << 4)]; encodedData[encodedIndex++] = lookUpBase64Alphabet[(l << 2) | val3]; encodedData[encodedIndex++] = lookUpBase64Alphabet[b3 & 0x3f]; i++; } encodedData[encodedIndex++] = 0xa; } for (; i < numberTriplets; i++) { b1 = binaryData[dataIndex++]; b2 = binaryData[dataIndex++]; b3 = binaryData[dataIndex++]; if (fDebug) { System.out.println("b1= " + b1 + ", b2= " + b2 + ", b3= " + b3); } l = (byte) (b2 & 0x0f); k = (byte) (b1 & 0x03); byte val1 = ((b1 & SIGN) = = 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); byte val3 = ((b3 & SIGN) == 0) ? ( byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc); if (fDebug) { System.out.println("val2 = " + val2); System.out.println("k4 = " + (k << 4)); System.out.println("vak = " + (val2 | (k < < 4))); } encodedData[encodedIndex++] = lookUpBase64Alphabet[val1]; encodedData[encodedIndex++] = lookUpBase64Alphabet[val2 | (k << 4)]; encodedData[encodedIndex++] = lookUpBase64Alphabet[(l << 2) | val3]; encodedData[encodedIndex++] = lookUpBase64Alphabet[b3 & 0x3f]; } // form integral number of 6- bit groups if (fewerThan24bits == EIGHTBIT) { b1 = binaryData[dataIndex]; k = (byte) (b1 & 0x03); if (fDebug) { System.out.println("b1=" + b1); System.out.println("b1<<2 = " + (b1 >> 2)); } byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); encodedData[encodedIndex++] = lookUpBase64Alphabet[val1]; encodedData[encodedIndex++] = lookUpBase64Alphabet[k << 4]; encodedData[encodedIndex++] = PAD; encodedData[encodedIndex++] = PAD; } else if (fewerThan24bits == SIXTEENBIT) { b1 = binaryData[dataIndex]; b2 = binaryData[dataIndex + 1]; l = (byte) (b2 & 0x0f); k = (byte) (b1 & 0x03); byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); encodedData[encodedIndex++] = lookUpBase64Alphabet[val1] ; encodedData[encodedIndex++] = lookUpBase64Alphabet[val2 | (k << 4)]; encodedData[encodedIndex++] = lookUpBase64Alphabet[l << 2]; encodedData[encodedIndex++] = PAD; } encodedData[encodedIndex] = 0xa; return new String(encodedData); } /** * Decodes Base64 data into octects * * @param binaryData * Byte array containing Base64 data * @return Array containind decoded data. */ public static byte[] decode(String encoded) { if (encoded == null) return null; char[] base64Data = encoded.toCharArray(); // remove white spaces int len = removeWhiteSpace(base64Data); if (len % FOURBYTE != 0) { return null;// should be divisible by four } int numberQuadruple = (len / FOURBYTE); if (numberQuadruple == 0) return new byte[0]; byte decodedData[] = null; byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0; char d1 = 0, d2 = 0, d3 = 0, d4 = 0; int i = 0; int encodedIndex = 0; int dataIndex = 0; decodedData = new byte[(numberQuadruple) * 3]; for (; i < numberQuadruple - 1; i++) { if (!isData((d1 = base64Data[dataIndex++])) || !isData ((d2 = base64Data[dataIndex++])) || !isData((d3 = base64Data[dataIndex++])) || !isData((d4 = base64Data[dataIndex++]))) return null;// if found "no data" just return null b1 = base64Alphabet[d1]; b2 = base64Alphabet[d2]; b3 = base64Alphabet[d3]; b4 = base64Alphabet[d4]; decodedData[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4); decodedData[encodedIndex++] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); decodedData[ encodedIndex++] = (byte) (b3 << 6 | b4); } if (!isData((d1 = base64Data[dataIndex++])) || !isData((d2 = base64Data[dataIndex++]))) { return null;// if found "no data" just return null } b1 = base64Alphabet[d1]; b2 = base64Alphabet[d2] ; d3 = base64Data[dataIndex++]; d4 = base64Data[dataIndex++]; if (!isData((d3)) || !isData((d4))) {// Check if they are PAD characters if (isPad(d3) && isPad(d4)) { // Two PAD eg 3c[Pad][ Pad] if ((b2 & 0xf) != 0)// last 4 bits should be zero return null; byte[] tmp = new byte[i * 3 + 1]; System.arraycopy(decodedData, 0, tmp, 0, i * 3); tmp[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); return tmp; } else if (!isPad( d3) && isPad(d4)) { // One PAD eg 3cQ[Pad] b3 = base64Alphabet[d3]; if ((b3 & 0x3) != 0)// last 2 bits should be zero return null; byte[] tmp = new byte[i * 3 + 2]; System.arraycopy(decodedData, 0, tmp, 0, i * 3); tmp[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4); tmp[encodedIndex] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); return tmp; } else { return null;// an error like " 3c[Pad]r", "3cdX", "3cXd", "3cXX" // where X is non data } } else { // No PAD eg 3cQl b3 = base64Alphabet[d3]; b4 = base64Alphabet[d4]; decodedData[encodedIndex++] = (byte) (b1 << 2 | b2 >> 4); decodedData[encodedIndex++] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); decodedData[encodedIndex++] = (byte) (b3 << 6 | b4); } return decodedData; } /** * remove WhiteSpace from MIME containing encoded Base64 data. * * @param data * the byte array of base64 data (with WS ) * @return the new length */ protected static int removeWhiteSpace(char[] data) { if (data == null) return 0; // count characters that's not whitespace int newSize = 0; int len = data.length; for (int i = 0; i < len; i++) { if (!isWhiteSpace(data[i])) data[newSize++] = data[i]; } return newSize; } public static void main(String[] args) { System.out.println(encode("People's Republic of China".getBytes())); }}