You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
tea/sync.py

123 lines
5.3 KiB

import json
import django
from django.conf import settings
import requests
from bs4 import BeautifulSoup
from pathlib import Path
from choose.utils import get_extension
BASE_DIR = Path(__file__).resolve().parent
settings.configure(
DATABASES={
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3',
}
},
INSTALLED_APPS=[
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'choose'
]
)
django.setup()
from choose.models import TeaType, Tea, TeaCategory, Pic
db_cats = list(TeaCategory.objects.all())
categories = ['https://chainiisvet.ru/product-category/' + x.shop_name + '/' for x in db_cats]
r = requests.get("https://chainiisvet.ru/")
soup = BeautifulSoup(r.content, features="html.parser")
ul = soup.find("ul", {"id": "menu-katalog"})
for item in ul.contents:
if item.__class__.__name__ == 'Tag':
if item.contents[0].attrs['href'] in categories:
category = TeaCategory.objects.get(name=item.contents[0].contents[0])
for i in item.contents[2]:
if i.__class__.__name__ == 'Tag':
ttype = i.contents[0].contents[0]
href = i.contents[0].attrs['href'] if i.contents[0].attrs['href'].startswith(
'https') else 'https://chainiisvet.ru' + i.contents[0].attrs['href']
if 'shop' in href:
_, created = TeaType.objects.get_or_create(
shop_name=href[href[:href.rfind('/', 0, len(href) - 1)].rfind('/') + 1:-1],
name=ttype,
category=category,
one_item=True)
else:
_, created = TeaType.objects.get_or_create(
shop_name=href[href.rfind('/', 0, len(href) - 1) + 1:-1],
name=ttype,
category=category,
one_item=False)
print(created)
print(category, end=':')
print(ttype, end=':')
print(href)
for tea_type in list(TeaType.objects.all()):
if tea_type.one_item:
r = requests.get(
"https://chainiisvet.ru/shop/" + tea_type.shop_name + '/')
soup = BeautifulSoup(r.content, features="html.parser")
href = str(soup.find_all("img", {"class": "attachment-shop_single"})[0].attrs['src'])
pic, created = Pic.objects.get_or_create(href=href)
if created:
r = requests.get(href)
with open('media/' + str(pic.id) + '.' + get_extension(href), 'wb') as f:
f.write(r.content)
name = str(soup.find_all("h1", {"class": "product_title"})[0].contents[0])
if len(soup.find_all("form", {"class": "variations_form cart"})):
price = \
json.loads(
soup.find_all("form", {"class": "variations_form cart"})[0].attrs['data-product_variations'])[0][
'display_price'] * 100
else:
price = int(
soup.find_all("div", {"class": 'entry-summary'})[0].contents[3].contents[0].contents[0].replace(".",
"").replace(
",", ""))
tea = Tea.objects.filter(name=name, type=tea_type).first()
if tea is None:
Tea.objects.create(name=name, type=tea_type, price=price, pic=pic)
else:
if not tea.price == price:
tea.price = price
tea.save()
if not tea.pic == pic:
tea.pic = pic
else:
r = requests.get(
"https://chainiisvet.ru/product-category/" + tea_type.category.shop_name + '/' + tea_type.shop_name + '/')
soup = BeautifulSoup(r.content, features="html.parser")
ul = soup.find_all("ul", {"class": "products"})[0]
for item in ul.contents:
if item.__class__.__name__ == 'Tag':
contents = item.contents[1].contents
href = str(contents[1].attrs['data-src'])
pic, created = Pic.objects.get_or_create(href=href)
if created:
r = requests.get(href)
with open('media/' + str(pic.id) + '.' + get_extension(href), 'wb') as f:
f.write(r.content)
name = ''
for i in contents[2]:
if not i.__class__.__name__ == 'Tag':
name = name + str(i)
price = int(str(contents[6].contents[1].contents[0]).replace(".", "").replace(",", ""))
print(name)
tea = Tea.objects.filter(name=name, type=tea_type).first()
if tea is None:
Tea.objects.create(name=name, type=tea_type, price=price, pic=pic)
else:
if not tea.price == price:
tea.price = price
tea.save()
if not tea.pic == pic:
tea.pic = pic