-
Notifications
You must be signed in to change notification settings - Fork 0
/
download.py
39 lines (33 loc) · 1.33 KB
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import requests
import os
from pyquery import PyQuery as PQ
def get_alllinks(url):
pq = PQ(requests.get(url).content)
links = pq(".paragraphs-item-content-text .content a")
urls = []
for link in links:
urls.append(PQ(link).attr("href"))
return urls
def download_files(url, folder):
try:
os.mkdir(folder)
except:
pass
pq = PQ(requests.get(url).content)
links = pq(".content table td a")
for link in links:
pq = PQ(link)
name = pq.text()
url = pq.attr("href")
if url is None or name is None:
continue
resp = requests.get(url, stream=True)
print(f"downloading {name} from {url}")
with open(os.path.join(folder, name), "wb ") as f:
for chunk in resp.iter_content(chunk_size=8192):
f.write(chunk)
if __name__ == "__main__":
download_files("https://engineering.case.edu/bearingdatacenter/normal-baseline-data", "dataset/normal")
download_files("https://engineering.case.edu/bearingdatacenter/12k-drive-end-bearing-fault-data", "dataset/12KDriveEnd")
download_files("https://engineering.case.edu/bearingdatacenter/48k-drive-end-bearing-fault-data", "dataset/48KDriveEnd")
download_files("https://engineering.case.edu/bearingdatacenter/12k-fan-end-bearing-fault-data", "dataset/FanEnd")