Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

이벤트 크롤링 #52

Merged
merged 5 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/main/java/com/sickgyun/server/event/domain/Event.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package com.sickgyun.server.event.domain;

import java.time.YearMonth;

import jakarta.persistence.Entity;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import lombok.AccessLevel;
import lombok.NoArgsConstructor;

@Entity
@NoArgsConstructor(access = AccessLevel.PROTECTED)
public class Event {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;

private String image;

private String name;

private String host;

private String date;

private String hashTags;

private int year;

private int month;

public Event(String image, String name, String host, String date, String hashTags, YearMonth yearMonth) {
this.image = image;
this.name = name;
this.host = host;
this.date = date;
this.hashTags = hashTags;
this.year = yearMonth.getYear();
this.month = yearMonth.getMonth().getValue();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.sickgyun.server.event.domain.repository;

import org.springframework.data.jpa.repository.JpaRepository;

import com.sickgyun.server.event.domain.Event;

public interface EventRepository extends JpaRepository<Event, Long> {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.sickgyun.server.event.exception;

import org.springframework.http.HttpStatus;

import com.sickgyun.server.common.exception.SickgyunException;

public class EventUrlConnectingError extends SickgyunException {
public EventUrlConnectingError() {
super(HttpStatus.INTERNAL_SERVER_ERROR, "이벤트 정보 크롤링 도중 서버 연결에 실패하였습니다.");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package com.sickgyun.server.event.scheduler;

import java.io.IOException;
import java.time.YearMonth;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.stream.Stream;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;

import com.sickgyun.server.event.domain.Event;
import com.sickgyun.server.event.domain.repository.EventRepository;
import com.sickgyun.server.event.exception.EventUrlConnectingError;

import lombok.RequiredArgsConstructor;

@Component
@RequiredArgsConstructor
public class EventScheduler {

@Value("${event.url}")
private String url;
private final EventRepository eventRepository;

@Scheduled(cron = "0 0 5 1 * ?") // 매달 1일 5시
@Transactional
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

크롤링 되는 날은 한 번 얘기해봐야 할 거 같아요. 만약 2월 중순에 시작해서 2월 말에 끝나는 대회 정보 같은 건 확인 할 수 없어요....

public void getReqruitInformation() {
YearMonth now = YearMonth.now();
YearMonth nextMonth = now.plusMonths(1);

List<Event> thisMonthEvent = getEventsByMonth(now);
List<Event> nextMonthEvent = getEventsByMonth(nextMonth);

List<Event> allEvents = Stream.of(thisMonthEvent, nextMonthEvent)
.flatMap(Collection::stream)
.toList();

eventRepository.deleteAll();
eventRepository.saveAll(allEvents);
}

private List<Event> getEventsByMonth(YearMonth date) {
Document document = connectToServer(date);
Elements rawEvents = getRawEvents(document);
return getAllEvents(rawEvents, date);
}

private static List<Event> getAllEvents(Elements rawEvents, YearMonth yearMonth) {
List<Event> events = new ArrayList<>();

for (Element rawReqruit : rawEvents) {
String imageSrc = "https://dev-event.vercel.app/" + rawReqruit.select("img").get(2).attr("src");
String name = rawReqruit.getElementsByClass("Item_item__content__title___fPQa").text();
String host = rawReqruit.getElementsByClass("Item_host__zNXMy").text();
String date = rawReqruit.getElementsByClass("Item_date__kVMJZ").text();
String hashtags = rawReqruit.getElementsByClass("Item_tags___ujeV").text()
.replace(" ", "")
.replace("#", " #")
.substring(1);

events.add(
new Event(
imageSrc,
name,
host,
date,
hashtags,
yearMonth
)
);

}
return events;
}

private static Elements getRawEvents(Document document) {
return document.getElementsByClass("Item_item__86e_I");
}

private Document connectToServer(YearMonth date) {
Document document;
try {
document = Jsoup.connect(String.format(url, date.getYear(), date.getMonth().getValue())).get();
} catch (IOException e) {
throw new EventUrlConnectingError();
}
return document;
}
}
5 changes: 5 additions & 0 deletions src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ jwt:
---
#Reqruit
reqruit:
url: ${REQRUIT_URL}

---
#event
event:
url: ${REQRUIT_URL}
3 changes: 3 additions & 0 deletions src/test/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,7 @@ jwt:
refresh-token-expiration-time: 15

reqruit:
url: hahathisisurl

event:
url: hahathisisurl
Loading