https://docs.python.org/ko/3/library/urllib.request.html#module-urllib.request
ํ์ด์ฌ์ ๊ธฐ๋ณธ๋ชจ๋์ธ urllib์ requestํจ์๋ฅผ ํตํด HTTP์ ๋ณด๋ฅผ ์์ , ์ฝ๊ธฐ๊ฐ ๊ฐ๋ฅํ๋ค.
ex) ์ด๋ฏธ์งํ์ผ, html(ํ์ด์ง ์์ค)
*reqest.urlretrieve(url , ํ์ผ๊ฒฝ๋ก)
#urllib
#HTTP ์ ๋ณด ์์ ํ๊ธฐ
import urllib.request as req
#ํ์ผ URL
image_url = 'http://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&fname=https%3A%2F%2Fk.kakaocdn.net%2Fdn%2F18CAR%2FbtqzK1rSB5g%2FXR4oK6MYIBHJEVPj7rgYBk%2Fimg.png'
html_url = 'http://google.com'
#๋ค์ด ๋ฐ์ ๊ฒฝ๋ก
save_path1 = '..test1.jpg'
save_path2 = '..index.html'
try:
file1 , header1 = req.urlretrieve(img_url, save_path1) # ํค๋์ ๋ณด์ ํ์ผ๊ฒฝ๋ก๋ฅผ ๋ฆฌํดํ๋ค.
file2 , header2 = req.urlretrieve(html_url, save_path2)
except Exception as e:
print("Download failed")
print(e)
else:
print(header1)
print(header2)
#๋ค์ด๋ก๋ ํ์ผ ์ ๋ณด
print('Filename1 {}'.format(file1))
print('Filename2 {}'.format(file2))
print()
print("Download Succeed")
urlretrieve : ์ง์ ํด๋ url์์ ํ์ผ์ ์ ์ฅํ ๋ค , ํค๋์ ๋ณด์ ํ์ผ๊ฒฝ๋ก๋ฅผ ๋ฆฌํดํ๋ค.
ํค๋ ์ ๋ณด์ ํ์ผ์ ๋ณด ์ถ๋ ฅ๋ด์ฉ |
Date: Tue, 25 Feb 2020 16:35:31 GMT Server: PWS/8.3.2.7 X-Px: ms h0-s378.p63-icn ( h0-s411.p63-icn), rf-ht h0-s411.p63-icn ( h0-s776.p61-icn), rf-ht h0-s776.p61-icn ( origin) Age: 0 Cache-Control: max-age=7200 Expires: Tue, 25 Feb 2020 18:35:31 GMT Accept-Ranges: bytes Content-Length: 49119 Content-Type: image/png Last-Modified: Mon, 06 Jan 2020 13:17:56 GMT Connection: close
Date: Tue, 25 Feb 2020 16:35:31 GMT Expires: -1 Cache-Control: private, max-age=0 Content-Type: text/html; charset=ISO-8859-1 P3P: CP="This is not a P3P policy! See g.co/p3phelp for more info." Server: gws X-XSS-Protection: 0 X-Frame-Options: SAMEORIGIN Set-Cookie: 1P_JAR=2020-02-25-16; expires=Thu, 26-Mar-2020 16:35:31 GMT; path=/; domain=.google.com; Secure Set-Cookie: NID=198=uijrh2ejBDB4SLoz54AmFcJpl4FJcYZyo9enSWgkb7YBaR7dx1U1kKXxiTdTETgQ63hh--lXDK7ophlQKkUwAL7nUR6BgfGQ7V_RyBJxgam6M_2124ap4mWG1NoE_dDQq5AZoC3Jqxb33CVo-oY0DsXcaZSf1_klbQfRgB0sTB4; expires=Wed, 26-Aug-2020 16:35:31 GMT; path=/; domain=.google.com; HttpOnly Accept-Ranges: none Vary: Accept-Encoding Connection: close
Filename1 ..test1.jpg Filename2 ..index.html |
*urlopen์ ์ด์ฉํ ๋ค์ด๋ก๋ ๋ฐ Http error , URL error ์์ธ์ฒ๋ฆฌ
#urlopen ํจ์
import urllib.request as req
from urllib.error import URLError, HTTPError
# ๋ค์ด๋ก๋ ๊ฒฝ๋ก ๋ฐ ํ์ผ๋ช
path_list = ["..test2.jpg", "..index2.html"]
# ๋ค์ด๋ก๋ ๋ฆฌ์์ค url
target_url = ["https://movie-phinf.pstatic.net/20190625_168/1561426986010A3uBi_JPEG/movie_image.jpg", "http://infinitt.tisotry.com"]
for i, url in enumerate(target_url):
#์์ธ ์ฒ๋ฆฌ
try :
# ์น ์์ ์ ๋ณด ์ฝ๊ธฐ
response = req.urlopen(url)
# ์์ ๋ด์ฉ
contents = response.read()
print("--------------1----------------")
# ์ํ ์ ๋ณด ์ถ๋ ฅ (200๋ฒ์ด ์ ์.)
print("Header Info-{} : {}". format(i, response.info()))
print("HTTP Status Code: {}".format(response.getcode()))
print()
print("---------------2---------------")
with open(path_list[i], 'wb') as c : #write binary
c.write(contents)
except HTTPError as e :
print("Download Failed.")
print("HTTPError Code:", e.code)
except URLError as u :
print("Download Failed.")
print("URL Error Reason:", e.reason)
else :
print()
print("Download Succeed.")