You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
tea/sync.py

50 lines
1.8 KiB

import django
from django.conf import settings
import requests
from bs4 import BeautifulSoup
from pathlib import Path
BASE_DIR = Path(__file__).resolve().parent
settings.configure(
DATABASES={
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3',
}
},
INSTALLED_APPS=[
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'choose'
]
)
django.setup()
from choose.models import TeaType, Tea, TeaCategory
db_cats = list(TeaCategory.objects.all())
categories = ['https://chainiisvet.ru/product-category/' + x.shop_name + '/' for x in db_cats]
r = requests.get("https://chainiisvet.ru/")
soup = BeautifulSoup(r.content)
ul = soup.find("ul", {"id": "menu-katalog"})
for item in ul.contents:
if item.__class__.__name__ == 'Tag':
if item.contents[0].attrs['href'] in categories:
category = TeaCategory.objects.get(name=item.contents[0].contents[0])
for i in item.contents[2]:
if i.__class__.__name__ == 'Tag':
type = i.contents[0].contents[0]
href = i.contents[0].attrs['href'] if i.contents[0].attrs['href'].startswith(
'https') else 'https://chainiisvet.ru' + i.contents[0].attrs['href']
TeaType.objects.get_or_create(shop_name=href[href.rfind('/', 0, len(href)-1)+1:-1],
name=type,
category=category)
print(category, end=':')
print(type, end=':')
print(href)
pass