Skip to content

wayne931121/Python_URL_Decode

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

16 Commits
 
 
 
 

Repository files navigation

Python_URL_Decode

Python URL Decoder

The function url_decoder in url_decoder.py

from url_decode import url_encoder, url_decoder

url = "https://zh.wikipedia.org/zh-tw/百分号编码"
url = url_decoder(url)
print(url)

## output: https://zh.wikipedia.org/zh-tw/百分号编码
def url_decoder(b):
# https://zh.wikipedia.org/zh-tw/百分号编码
# ! mean 21hex, => int("21",16) => 33 => chr(33) => "!", result: "!"
    if type(b)==bytes:
        b = b.decode("utf-8") #byte can't insert utf8 charater
    result = bytearray()
    enter_hex_unicode_mode = 0
    hex_tmp = ""
    now_index = 0
    for i in b:
        if i=='%': #like QR, have entered mode, continue appending bytearray
            enter_hex_unicode_mode = 1
            continue
        if enter_hex_unicode_mode:
            hex_tmp  = i
            now_index  = 1
            if now_index==2: #Q_ len("51")=2 len("5F")=2
                result.append(int(hex_tmp, 16) )
                hex_tmp = ""
                now_index = 0
                enter_hex_unicode_mode = 0
            continue
        result.append(ord(i))
    result = result.decode(encoding="utf-8")
    return result
#保留字元的百分號編碼
URL_RFC_3986 = {
"!": "!", "#": "#", "$": "$", "&": "&", "'": "'", "(": "(", ")": ")", "*": "*", " ": "+", 
",": ",", "/": "/", ":": ":", ";": ";", "=": "=", "?": "?", "@": "@", "[": "[", "]": "]",
}

def url_encoder(b):
    # https://zh.wikipedia.org/wiki/百分号编码
    if type(b)==bytes:
        b = b.decode(encoding="utf-8") #byte can't insert many utf8 charaters
    result = bytearray() #bytearray: rw, bytes: read-only
    for i in b:
        if i in URL_RFC_3986:
            for j in URL_RFC_3986[i]:
                result.append(ord(j))
            continue
        i = bytes(i, encoding="utf-8")
        if len(i)==1:
            result.append(ord(i))
        else:
            for c in i:
                c = hex(c)[2:].upper()
                result.append(ord("%"))
                result.append(ord(c[0:1]))
                result.append(ord(c[1:2]))
    result = result.decode(encoding="ascii")
    return result

#print(url_encoder("我好棒==%%0.0:)")) ==> '我好棒==%%0.0:)'

Releases

No releases published

Packages

No packages published

Languages