-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathscraper.rb
68 lines (57 loc) · 1.5 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
require "httparty"
require "nokogiri"
urls = []
page = Nokogiri::HTML(HTTParty.get("https://sfbay.craigslist.org/d/computers/search/sya"))
page.css("li.result-row > a").each do |link|
urls << link.attributes["href"].value
end
p urls
# class Scraper
# def initialize
# @URL = "https://example.com"
# @JOBS = []
# end
# def get_all_data
# num = 1
# page = scrape_page(@URL)
# per_page = page.css('div.listingCard').count
# total = set_total(page)
# last_page = set_last_page(total, per_page)
# loop_pages(num, last_page)
# end
# def scrape_page(url)
# scrape = Nokogiri::HTML(HTTParty.get(url))
# end
# def set_total(data)
# data.css('div.job-count').text.split(' ')[1].gsub(',','').to_i
# end
# def set_last_page(total, per_page)
# (total.to_f/per_page.to_f).round
# end
# def loop_pages(num, last_page)
# while num <= last_page
# handle_pagination(num)
# num += 1
# end
# end
# def handle_pagination(num)
# url = "#{@URL}/listings?page=#{num}"
# jobs = scrape_page(url).css('div.listingCard')
# build_jobs(jobs)
# end
# def build_jobs(jobs)
# jobs.each do |j|
# job = build_job(j)
# @JOBS << job
# end
# end
# def build_job(job)
# resutl = {
# title: job.css('span.job-title').text,
# company: job.css('span.company').text,
# location: job.css('span.location').text,
# url: "#{@URL}/#{job.css('a')[0].attributes['href'].value}"
# }
# result
# end
# end