import json import django from django.conf import settings import requests from bs4 import BeautifulSoup from pathlib import Path from choose.utils import get_extension BASE_DIR = Path(__file__).resolve().parent settings.configure( DATABASES={ 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': BASE_DIR / 'db.sqlite3', } }, INSTALLED_APPS=[ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'choose' ] ) django.setup() from choose.models import TeaType, Tea, TeaCategory, Pic db_cats = list(TeaCategory.objects.all()) categories = ['https://chainiisvet.ru/product-category/' + x.shop_name + '/' for x in db_cats] r = requests.get("https://chainiisvet.ru/") soup = BeautifulSoup(r.content, features="html.parser") ul = soup.find("ul", {"id": "menu-katalog"}) for item in ul.contents: if item.__class__.__name__ == 'Tag': if item.contents[0].attrs['href'] in categories: category = TeaCategory.objects.get(name=item.contents[0].contents[0]) for i in item.contents[2]: if i.__class__.__name__ == 'Tag': ttype = i.contents[0].contents[0] href = i.contents[0].attrs['href'] if i.contents[0].attrs['href'].startswith( 'https') else 'https://chainiisvet.ru' + i.contents[0].attrs['href'] if 'shop' in href: _, created = TeaType.objects.get_or_create( shop_name=href[href[:href.rfind('/', 0, len(href) - 1)].rfind('/') + 1:-1], name=ttype, category=category, one_item=True) else: _, created = TeaType.objects.get_or_create( shop_name=href[href.rfind('/', 0, len(href) - 1) + 1:-1], name=ttype, category=category, one_item=False) print(created) print(category, end=':') print(ttype, end=':') print(href) for tea_type in list(TeaType.objects.all()): if tea_type.one_item: r = requests.get( "https://chainiisvet.ru/shop/" + tea_type.shop_name + '/') soup = BeautifulSoup(r.content, features="html.parser") href = str(soup.find_all("img", {"class": "attachment-shop_single"})[0].attrs['src']) pic, created = Pic.objects.get_or_create(href=href) if created: r = requests.get(href) with open('media/' + str(pic.id) + '.' + get_extension(href), 'wb') as f: f.write(r.content) name = str(soup.find_all("h1", {"class": "product_title"})[0].contents[0]) if len(soup.find_all("form", {"class": "variations_form cart"})): price = \ json.loads( soup.find_all("form", {"class": "variations_form cart"})[0].attrs['data-product_variations'])[0][ 'display_price'] * 100 else: price = int( soup.find_all("div", {"class": 'entry-summary'})[0].contents[3].contents[0].contents[0].replace(".", "").replace( ",", "")) tea = Tea.objects.filter(name=name, type=tea_type).first() if tea is None: Tea.objects.create(name=name, type=tea_type, price=price, pic=pic) else: if not tea.price == price: tea.price = price tea.save() if not tea.pic == pic: tea.pic = pic else: r = requests.get( "https://chainiisvet.ru/product-category/" + tea_type.category.shop_name + '/' + tea_type.shop_name + '/') soup = BeautifulSoup(r.content, features="html.parser") ul = soup.find_all("ul", {"class": "products"})[0] for item in ul.contents: if item.__class__.__name__ == 'Tag': contents = item.contents[1].contents href = str(contents[1].attrs['data-src']) pic, created = Pic.objects.get_or_create(href=href) if created: r = requests.get(href) with open('media/' + str(pic.id) + '.' + get_extension(href), 'wb') as f: f.write(r.content) name = '' for i in contents[2]: if not i.__class__.__name__ == 'Tag': name = name + str(i) price = int(str(contents[6].contents[1].contents[0]).replace(".", "").replace(",", "")) print(name) tea = Tea.objects.filter(name=name, type=tea_type).first() if tea is None: Tea.objects.create(name=name, type=tea_type, price=price, pic=pic) else: if not tea.price == price: tea.price = price tea.save() if not tea.pic == pic: tea.pic = pic