Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions moodico/products/utils/scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# utils/scraper.py
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import sys

def _build_chrome_driver():
# Chrome config
options = webdriver.ChromeOptions()
options.add_argument("--headless=new")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
)

# Linux (Ubuntu) 배포 서버
if sys.platform.startswith("linux"):
options.binary_location = "/usr/bin/chromium-browser"
service = Service("/usr/bin/chromedriver")

return webdriver.Chrome(service=service, options=options)

# macOS/Windows or fallback -> use webdriver_manager
return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

def scrape_oliveyoung_products(max_items=10):
driver = _build_chrome_driver()
products = []

try:
# Target
url = (
"https://www.oliveyoung.co.kr/store/main/getBestList.do"
"?dispCatNo=900000100100001&fltDispCatNo=10000010002&pageIdx=1&rowsPerPage=10"
)
driver.get(url)

wait = WebDriverWait(driver, 5) # 5초 기다리기 (아이템들 있는지 체크하는 동안)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "ul.cate_prd_list")))
items = driver.find_elements(By.CSS_SELECTOR, "ul.cate_prd_list li") #all <li>

for item in items[:max_items]:
try:
# 제품 정보
prd_info = item.find_element(By.CSS_SELECTOR, "div.prd_info")
link_tag = prd_info.find_element(By.CSS_SELECTOR, "a.prd_thumb")
product_url = link_tag.get_attribute("href")
# 제품 이미지
img_tag = link_tag.find_element(By.TAG_NAME, "img")
image_src = img_tag.get_attribute("src") or img_tag.get_attribute("data-original") or ""
image_alt = img_tag.get_attribute("alt") or ""

brand_name = prd_info.find_element(By.CSS_SELECTOR, "span.tx_brand").text.strip()
product_name = prd_info.find_element(By.CSS_SELECTOR, "p.tx_name").text.strip()
price_original = prd_info.find_element(By.CSS_SELECTOR, "p.prd_price span.tx_org").text.strip()

# 제품 태크들
flags = []
try:
flag_spans = prd_info.find_elements(By.CSS_SELECTOR, "p.prd_flag span.icon_flag")
flags = [flag.text.strip() for flag in flag_spans if flag.text.strip()]
except Exception:
pass

products.append({
"product_url": product_url,
"brand_name": brand_name,
"product_name": product_name,
"image_src": image_src,
"image_alt": image_alt,
"price_original": price_original,
"flags": flags,
})

if len(products) >= max_items:
break
except Exception:
continue
finally:
driver.quit()

return products
35 changes: 0 additions & 35 deletions moodico/products/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def product_detail(request, product_id):
return render(request, 'products/detail.html', {'product': product})

def crawled_product_detail(request, crawled_id):
# crawled_id -> a4c0a977-cced-4ce8-abea-f718dcff8325
"""크롤링된 제품 상세 페이지 뷰"""
try:
logger.info(f"크롤링된 제품 상세 페이지 요청: crawled_id = {crawled_id}")
Expand All @@ -71,7 +70,6 @@ def crawled_product_detail(request, crawled_id):

for p in products:
if p.get('id') == crawled_id:
# p.get('id') -> a4c0a977-cced-4ce8-abea-f718dcff8325
product = p
break
print('...',crawled_id)
Expand All @@ -92,39 +90,6 @@ def crawled_product_detail(request, crawled_id):
average_rating = all_reviews.aggregate(avg=Avg('rating')).get('avg') or 0
average_rating = round(average_rating, 2)

# if not product:
# return render(request, 'products/detail.html', {
# 'error': '제품을 찾을 수 없습니다.',
# 'product': None
# })

# # 해당 제품의 리뷰 정보 가져오기
# from moodico.users.utils import get_user_from_request
# user = get_user_from_request(request)

# # 제품 ID로 리뷰 찾기 (crawled_id 사용)
# user_review = None
# print('..',ProductRating.objects.all())
# if user:
# try:
# user_review = ProductRating.objects.get(
# user=user,
# product_id=crawled_id
# )
# except ProductRating.DoesNotExist:
# pass

# # 제품의 모든 리뷰 가져오기
# # [<ProductRating: zin - 롬앤 더 쥬시 래스팅 틴트 / 02 누카다미아 (4점)>]>
# all_reviews = ProductRating.objects.filter(product_id=crawled_id).order_by('-created_at')

# # 평균 별점과 평가 개수 계산
# total_ratings = all_reviews.count()
# if total_ratings > 0:
# total_score = sum(review.rating for review in all_reviews)
# average_rating = round(total_score / total_ratings, 1)
# else:
# average_rating = 0.0
context = {
'product': product,
'user_review': user_review,
Expand Down
73 changes: 40 additions & 33 deletions moodico/recommendation/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,53 +8,60 @@
from sklearn.metrics.pairwise import cosine_similarity

# Create your views here.
# def my_item_recommendation(request):
# # Get recommended or default products
# search_results = get_top_liked_products(limit=10)
# recommended_items = [] # Set this if you want a separate recommended section
# print("....",search_results)

# return render(
# request,
# 'upload/upload.html',
# {
# 'search_results': search_results,
# 'recommended_items': recommended_items
# }
# )

def get_recommendation_list():
# JSON 데이터를 파싱 (실제로는 DB나 API에서 받아올 수 있음)
products_path = 'static/data/advertise_products.json'
with open(products_path, 'r', encoding='utf-8') as f:
raw_data = json.load(f)

# 태그 추출 규칙 예시 (첫번째 flag 사용 or None)
from moodico.products.utils.scraper import scrape_oliveyoung_products
import time
from django.core.cache import cache

CACHE_KEY = "oliveyoung_bestlist_v1"
CACHE_TTL = 60 * 60 * 24 # 24 hours

def make_search_results(raw_data):
def get_tag(flags):
for tag in ['글로시', 'matte', 'glossy', '증정', '세일', '쿠폰', '오늘드림']:
if tag in flags:
return tag
return flags[0] if flags else '-'

search_results = [
return [
{
"brand": item["brand_name"],
"name": item["product_name"],
"image": item["image_src"],
"price": item["price_original"].replace("~", ""),
"brand": item.get("brand_name", ""),
"name": item.get("product_name", ""),
"image": item.get("image_src", ""),
"price": (item.get("price_original", "") or "").replace("~", ""),
"tag": get_tag(item.get("flags", [])),
"url": item["product_url"],
"url": item.get("product_url", ""),
}
for item in raw_data
]
return search_results

def get_recommendation_list(force_refresh=False):
cached = cache.get(CACHE_KEY)
# 캐시가 없거나 force_refresh(자발적인 refresh)이면
if (not cached) or force_refresh:
raw_data = scrape_oliveyoung_products()
search_results = make_search_results(raw_data)
payload = {
"results": search_results,
"fetched_at": int(time.time()), # 언제 refresh 됐는지 알 수 있게
}
cache.set(CACHE_KEY, payload, CACHE_TTL)
return payload

return cached

def my_item_recommendation(request):
search_results = get_recommendation_list()
return render(request, 'upload/upload.html', {
"search_results": search_results
})
# 자발적으로 확인 하고 싶을때: /?refresh=1
force = request.GET.get("refresh") == "1"
data = get_recommendation_list(force_refresh=force)

return render(
request,
"upload/upload.html",
{
"search_results": data["results"],
"fetched_at": data["fetched_at"],
},
)

@csrf_exempt
def recommend_by_color(request):
Expand Down
Loading