I have a problem with querying the website using the script below, I get error 422 and I have no idea what the problem is.
import requests
import time
import csv
import json
import pandas as pd
Adres URL do zapytania
url = “https://www.otodom.pl/api/query”
Nagłówki
headers = {
“Accept”: “multipart/mixed, application/graphql-response+json, application/graphql+json, application/json”,
“Accept-Encoding”: “gzip, deflate, br, zstd”,
“Accept-Language”: “pl-PL,pl;q=0.9,en-US;q=0.8,en;q=0.7”,
“Content-Type”: “application/json”,
“Origin”: “https://www.otodom.pl”,
“Referer”: “Kolejova 1, mieszkania from Novaform Polska - Poznań, Wilda - 65119310 • www.otodom.pl”,
“Sec-Ch-Ua”: ‘“Not/A)Brand”;v=“8”, “Chromium”;v=“126”, “Google Chrome”;v=“126”’,
“Sec-Ch-Ua-Mobile”: “?0”,
“Sec-Ch-Ua-Platform”: ‘“Windows”’,
“Sec-Fetch-Dest”: “empty”,
“Sec-Fetch-Mode”: “cors”,
“Sec-Fetch-Site”: “same-origin”,
“User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36”,
“Cookie”: “lang=pl; laquesisff=gre-12226#rer-165#rer-166#rst-73#rst-74; dfp_user_id=ad0df175-7125-44f4-830b-19dca5024f00; OptanonAlertBoxClosed=2024-07-10T13:43:24.151Z; eupubconsent-v2=CQBiPzAQBiPzAAcABBENA7E8AP_gAAAAAAYgJ9NV_G_fbXlj8Xp0aftkeY1f99h7rsQxBhfJk-4FyLuW_JwX32EzNA16pqYKmRIEu3bBIQFlHIDUDUCgaogVrTDMakWMgTNKJ6BEiFMRe2dYCF5vmQFD-QKY5tpvd3d52Re9_dv83dzyz4Vnn3Kp_-e1WJCdA5cgAAAAAAAAAAAAAAAQAAAAAAAAAQAIAAAAAAAAAAAAAAAAAAAAA_cBf78AAABgSCAAAgABcAFAAVAA4AB4AEEALwA1AB4AEQAJgAVQA3gB6AD8AISAQwBEgCOAEsAJoAYAAw4BlAGWANkAc8A7gDvgHsAfEA-wD9gH-AgABFICLgIwARqAkQCSwE_AUGAqACrgFzAL0AYoA0QBtADcAHEgR6BIgCdgFDgKPAUiAtgBcgC7wF5gMGAYbAyMDJAGTgMzAZzA1cDWQG3gNzAbqA4IByYDlwJuBAC4ADgASABHAIOARwAmgBfQErAJtAUgArkBYQCxAFuALyAYgAxYBkIDRgGpgNoAbcA3QcArAARAA4ADwALgAkAB-AEcAKAAaABHADkAIBAQcBCACIgEcAJoAVAA6QCVgExAJlATaApOBXIFdgLEAWoAtwBdADBAGIAMWAZCAyYBowDUwGvANoAbYA26BuYG6AOPActA50DnwJtjoJoAC4AKAAqABwAEEALgA1AB4AEQAJgAVYAuAC6AGIAN4AegA_QCGAIkASwAmgBRgDAAGGAMoAaIA2QBzwDuAO8Ae0A-wD9AH_ARQBGICOgJLAT8BQYCogKuAWIAucBeQF6AMUAbQA3ABxADqAH2ARfAj0CRAEyAJ2AUPAo8CkAFNAKsAWLAtgC2QFugLgAXIAu0Bd4C8wF9AMGAYaAx6BkYGSAMnAZUAywBmYDOQGmwNXA1gBt4DdQHFgOTAcuBNwCbwE4SABYABAADwA0ADkAI4AWIAvoCbQFJgK5AWIAvIBggDPAGjANTAbYA24BugDlgHPgTbIQIQAFgAUABcADUAJgAVQAuABiADeAHoARwAwABzwDuAO8Af4BFACUgFBgKiAq4BcwDFAG0AOoAj0BTQCrAFigLRAXAAuQBkYDJwGckoEgACAAFgAUAA4ADwAIgATAAqgBcADFAIYAiQBHACjAGAANkAd4A_ICogKuAXMAxQB1AETAIvgR6BIgCjwFNALFAWwAvOBkYGSAMnAZyA1gBt4E3AJwkgCQAFwAjgDuAIAAQcAjgBUAErAJiATaApMBbgDFgGWAM8AboA5YCbZQBGAAoAC4AJAAXABHAC2AI4AcgA7gB9gEAAIOAWIAuoBrwDtgH_ATEAm0BUgCuwFuALoAXkAwQBiwDJgGeANGAamA16BuYG6AOWAm2BOEpA7AAXABQAFQAOAAggBkAGoAPAAiABMACqAGIAP0AhgCJAFGAMAAZQA0QBsgDnAHfAPwA_QCLAEYgI6AkoBQYCogKuAXMAvIBigDaAG4AOoAe0A-wCJgEXwI9AkQBOwChwFIAKaAVYAsUBbAC4AFyALtAXmAvoBhsDIwMkAZOAywBnMDWANZAbeA3UBwQDkwJvFoBQANQBHADAAHcAXoA-wCmgFWAMzAm4WAFADLAI4Aj0BMQCbQFcgNGAamA3QBywAAA.f_wAAAAAAAAA; _gcl_au=1.1.2055002370.1720619005; st_userID=GA1.2.677752157.1720619004__unlogged; __rtbh.uid=%7B%22eventType%22%3A%22uid%22%2C%22id%22%3A%22null%22%7D; __rtbh.lid=%7B%22eventType%22%3A%22lid%22%2C%22id%22%3A%22RSUIiW9LkYNKoYidu4Qq%22%7D; _fbp=fb.1.1720619004943.430327873597863187; _tt_enable_cookie=1; _ttp=-tO7lU_USTwRf_Ijw0tgL6n9rDC; _ga_20T1C2M3CQ=deleted; laquesissu=314@ad_page|1#314@listing|1#314@my_ads_active|1#314@my_messages_received|1#314@my_payments|1#740@multipay_confirmation_page|1; __gfp_64b=E8kWbg.jzk02qSTFXBaE7iaVCLW8KL3wqULhp4l5VMP.C7|1720619005|2; __gads=ID=a007d79f3cdba9e6:T=1720637818:RT=1721840053:S=ALNI_MZIzgW5v-fa85I3oIh94869tFki0A; __gpi=UID=00000e79418aae3a:T=1720637818:RT=1721840053:S=ALNI_MbzEJNg0bz_dVB1azPKccVpOS-ogQ; __eoi=ID=646f966bf70b7a2d:T=1720637818:RT=1721840053:S=AA-AfjZBPcpl6NZhB3ziBuXDAOh5; _gid=GA1.2.1841178414.1722242020; _ga=GA1.1.677752157.1720619004; _uetsid=41e399d04d8511ef9fdc4b38f5d8f938; _uetvid=6141cc603ec211ef8ce6138e6bc9ece8; _clck=l60111%7C2%7Cfnv%7C0%7C1652; mobile_default=desktop; ninja_user_status=unlogged; PHPSESSID=au5nbph4g1fusb06pjlramlk79; lqstatus=1722246651|190e5aa254ex1759d346|eure-27417||314|0; laquesis=eure-19720@b#eure-21385@a#eure-25610@b#eure-26607@a#eure-27667@b; onap=1909ce2688cx4fbc8a3c-15-190fdca555dx4c16bedb-2-1722247431; _clsk=7ndzvc%7C1722245665296%7C2%7C0%7Co.clarity.ms%2Fcollect; _ga_20T1C2M3CQ=GS1.1.1722246123.16.0.1722246123.60.0.0; _ga_6PZTQNYS5C=GS1.1.1722246123.16.0.1722246123.60.0.0; OptanonConsent=isGpcEnabled=0&datestamp=Mon+Jul+29+2024+11%3A42%3A04+GMT%2B0200+(czas+%C5%9Brodkowoeuropejski+letni)&version=202401.2.0&browserGpcFlag=0&isIABGlobal=false&hosts=&consentId=cc390444-fee7-4946-a9ca-a8e2ef3d8ed5&interactionCount=1&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A1%2CC0003%3A1%2CC0004%3A1%2Cgad%3A1&geolocation=%3B&AwaitingReconsent=false”
}
Funkcja do wysyłania zapytań
def query_otodom(price_min, price_max):
payload = {
“operationName”: “GetListings”,
“variables”: {
“priceMin”: price_min,
“priceMax”: price_max,
“category”: “apartment”,
“market”: “primary”,
“limit”: 100,
“page”: 1
},
“query”: “”"
query GetListings($priceMin: Float, $priceMax: Float, $category: String, $market: String, $limit: Int, $page: Int) {
listings(
filters: {
priceMin: $priceMin,
priceMax: $priceMax,
category: $category,
market: $market
},
pagination: {
limit: $limit,
page: $page
}
) {
items {
id
title
url
price {
value
currency
}
area {
value
}
rooms {
value
}
location {
latitude
longitude
}
images {
url
}
}
}
}
“”"
}
try:
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
print(f"Błąd podczas wykonywania zapytania: {e}")
return None
Funkcja do zapisu danych do pliku CSV
def save_to_csv(data, file_path):
if not data:
return
listings = data.get("data", {}).get("listings", {}).get("items", [])
rows = []
for listing in listings:
rows.append([
listing.get("id", ""),
listing.get("title", ""),
listing.get("url", ""),
listing.get("price", {}).get("value", ""),
listing.get("area", {}).get("value", ""),
listing.get("rooms", {}).get("value", ""),
listing.get("location", {}).get("latitude", ""),
listing.get("location", {}).get("longitude", ""),
json.dumps([img["url"] for img in listing.get("images", [])])
])
df = pd.DataFrame(rows, columns=[
"id", "title", "url", "price", "area", "rooms", "latitude", "longitude", "images"
])
df.to_csv(file_path, index=False, encoding='utf-8')
Główna pętla do wykonywania zapytań w zadanym przedziale cen
all_data =
output_file = ‘otodom_data.csv’
for price_min in range(300000, 550001, 1000):
price_max = price_min + 1000
response_json = query_otodom(price_min, price_max)
if response_json is not None:
save_to_csv(response_json, output_file)
time.sleep(1) # Dodanie opóźnienia 1 sekundy między zapytaniami
print(“Wszystkie odpowiedzi zostały zapisane do pliku CSV.”)