Towarzystwo Miłośników Ziemi Koronowskiej

import os import json from bs4 import BeautifulSoup # Konfiguracja FOLDER_PROJEKTU = './' # Ścieżka do Twoich plików HTML PLIK_WYNIKOWY = 'search-index.json' def generuj_indeks(): indeks = [] print("Rozpoczynam indeksowanie plików...") for root, dirs, files in os.walk(FOLDER_PROJEKTU): for file in files: if file.endswith(".html") and file != "search-index.json": sciezka = os.path.join(root, file) relatywna_sciezka = os.path.relpath(sciezka, FOLDER_PROJEKTU).replace('\\', '/') with open(sciezka, 'r', encoding='utf-8') as f: soup = BeautifulSoup(f, 'html.parser') # Usuwamy sam widżet z indeksowania, żeby nie szukał samego siebie for s in soup.select('#advanced-search-widget-root'): s.decompose() # 1. Nagłówki (Tytuły sekcji) for h in soup.find_all(['h1', 'h2', 'h3']): tekst = h.get_text().strip() if tekst: indeks.append({ "url": relatywna_sciezka, "title": tekst, "desc": "Nagłówek na stronie: " + relatywna_sciezka, "type": "names" }) # 2. Akapity (Treść) for p in soup.find_all('p'): tekst = p.get_text().strip() if len(tekst) > 20: indeks.append({ "url": relatywna_sciezka, "title": "Fragment treści", "desc": tekst[:180] + "...", "type": "text" }) # 3. Obrazy (Opisy fotografii) for img in soup.find_all('img'): alt = img.get('alt', '') or img.get('title', '') if len(alt) > 2: indeks.append({ "url": relatywna_sciezka, "title": "Opis fotografii", "desc": alt, "type": "media", "src": img.get('src', '') }) with open(PLIK_WYNIKOWY, 'w', encoding='utf-8') as f: json.dump(indeks, f, ensure_ascii=False, indent=2) print(f"Gotowe! Plik {PLIK_WYNIKOWY} został utworzony.") if __name__ == "__main__": generuj_indeks()

86-010 Koronowo, ul. Szosa Kotomierska 3

tmzk76@o2.pl

+48 734 845 683

Website created in white label responsive website builder WebWave.