This web application regularly scrapes offers from a custom immobilienscout24.de search URL, extracts the most important information, sends an email in case of a new offer, and enables the user to delete offers, add notes, and set a status.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

392 lines
12 KiB

  1. <?php
  2. $config = parse_ini_file("config.php", true);
  3. $db = $config['general']['db'];
  4. $queryurl = $config['general']['url'];
  5. // make backup of database
  6. $db_bak = $db . ".bak_" . date('Y-m-d');
  7. if (! file_exists($db_bak)) {
  8. copy($db, $db_bak);
  9. }
  10. // Called via CLI or web?
  11. if (defined('STDIN')) {
  12. $do = $argv[1];
  13. } else {
  14. if ($_SERVER['REQUEST_METHOD'] === 'POST') {
  15. $do = isset($_POST['do']) ? $_POST['do'] : false;
  16. } else {
  17. $do = isset($_GET['do']) ? $_GET['do'] : false;
  18. }
  19. }
  20. // define action via "do" parameter
  21. if ($do === "download") {
  22. immo_download($db);
  23. } else if ($do === "update") {
  24. immo_update($db);
  25. } else if ($do === "show" || $do == "") {
  26. immo_show($db);
  27. } else {
  28. echo "No valid do action defined";
  29. }
  30. /////////////////////
  31. /// do = DOWNLOAD ///
  32. /////////////////////
  33. function immo_download($db) {
  34. global $data;
  35. global $queryurl;
  36. global $config;
  37. read_db($db);
  38. $site = xsite($queryurl); // load and transform URL (page 1) to queryable $site
  39. // get amount of available pages
  40. $searchPages = $site->query('//div[@id="pageSelection"]/select');
  41. if($searchPages->length == 0) {
  42. $pages=1;
  43. } else {
  44. $pages = $searchPages->item(0)->childNodes->length;
  45. }
  46. // create a dynamic url in which the current page number can be set in
  47. $queryurldyn = preg_replace("/\/Suche\//", "/Suche/P-%page%/", $queryurl);
  48. // loop through available pages
  49. for ($page = 1; $page <= $pages; $page++) {
  50. $queryurlcur = str_replace("%page%", $page, $queryurldyn);
  51. echo "Current search page: " . $queryurlcur . "<br />\n";
  52. $site = xsite($queryurlcur); // load and transform URL of current page
  53. // get all links to expose pages
  54. $searchResult = $site->query('//a[@class="result-list-entry__brand-title-container"]/@href');
  55. // loop through web search results
  56. foreach($searchResult as $result){
  57. global $data;
  58. $new = "y"; // is the search entry new?
  59. // extract ID of link
  60. preg_match("/\d+$/",$result->textContent, $matches);
  61. $id = $matches[0];
  62. $id = (int)$id; // convert to int value
  63. $total = count($data);
  64. for ($row = 0; $row < $total; $row++) {
  65. if ($id === $data[$row]['id']) {
  66. $new = "n"; // entry isn't new anymore
  67. }
  68. }
  69. if ($new === "y") { // entry is new
  70. // load and transform expose URL
  71. $site = xsite("https://www.immobilienscout24.de/expose/" . $id);
  72. // description
  73. $searchResult = $site->query('//h1[@id="expose-title"]');
  74. $desc = trim($searchResult->item(0)->nodeValue);
  75. $desc = mb_convert_encoding($desc, 'UTF-8', 'UTF-8'); // remove/replace invalid characters
  76. $descs = substr($desc, 0, 15);
  77. $descs = mb_convert_encoding($descs, 'UTF-8', 'UTF-8');
  78. // flat available from
  79. $searchResult = $site->query('//dd[@class="is24qa-bezugsfrei-ab grid-item three-fifths"]');
  80. $bezug = trim($searchResult->item(0)->nodeValue);
  81. // warm rent
  82. $searchResult = $site->query('//dd[@class="is24qa-gesamtmiete grid-item three-fifths font-bold"]');
  83. $mietew = trim($searchResult->item(0)->nodeValue);
  84. // cold rent
  85. $searchResult = $site->query('//dd[@class="is24qa-kaltmiete grid-item three-fifths"]');
  86. $mietek = trim($searchResult->item(0)->nodeValue);
  87. // rooms
  88. $searchResult = $site->query('//dd[@class="is24qa-zimmer grid-item three-fifths"]');
  89. $zimmer = trim($searchResult->item(0)->nodeValue);
  90. // size
  91. $searchResult = $site->query('//dd[@class="is24qa-wohnflaeche-ca grid-item three-fifths"]');
  92. $qm = trim($searchResult->item(0)->nodeValue);
  93. // location
  94. $searchResult = $site->query('//div[@class="address-block"]');
  95. $ort = trim($searchResult->item(0)->nodeValue);
  96. $ort = str_replace("(zur Karte) ", "", $ort);
  97. $ort = str_replace("Die vollständige Adresse der Immobilie erhalten Sie vom Anbieter.", "", $ort);
  98. // append new array entry
  99. $data[] = array("id" => $id,
  100. "date" => date('d.m. H:i'),
  101. "desc" => $desc,
  102. "descs" => $descs,
  103. "bezug" => $bezug,
  104. "mietew" => $mietew,
  105. "mietek" => $mietek,
  106. "ort" => $ort,
  107. "zimmer" => $zimmer,
  108. "qm" => $qm,
  109. "rating" => 0,
  110. "status" => "NEU",
  111. "note" => "");
  112. // send mail for new item
  113. $mailcontent = "*" . $desc . "* \r\n\r\n" .
  114. "Bezug: " . $bezug . "\r\n" .
  115. "Miete: " . $mietew . "/" . $mietek . "\r\n" .
  116. "Größe: " . $qm . "\r\n" .
  117. "Zimmer: " . $zimmer . "\r\n" .
  118. "Ort: " . $ort . "\r\n" .
  119. "Link: https://www.immobilienscout24.de/expose/" . $id . "\r\n\r\n" .
  120. "Übersicht: https://" . $_SERVER["SERVER_NAME"] . $_SERVER["PHP_SELF"];
  121. mail($config['mail']['to'], "Neues Wohnungsinserat: " . $descs . "...", $mailcontent, "From: " . $config['mail']['from'] . "\r\nMIME-Version: 1.0\r\nContent-Type: text/plain; charset=UTF-8");
  122. echo $id . " is new and has been downloaded.<br />\n";
  123. } else { // entry is old
  124. echo $id . " already exists.<br />\n";
  125. }
  126. } // END foreach &searchResult
  127. } // END for loop through pages
  128. echo "<p><a href='.'>Back to overview</a></p>\n";
  129. write_db($db, $data);
  130. }
  131. /////////////////
  132. /// do = SHOW ///
  133. /////////////////
  134. function immo_show($db) {
  135. global $data;
  136. global $queryurl;
  137. read_db($db);
  138. $hide = isset($_GET['hide']) ? $_GET['hide'] : "yes";
  139. ?>
  140. <!DOCTYPE html>
  141. <html lang="de-DE">
  142. <head>
  143. <title>Immobilienscout Search Helper</title>
  144. <meta charset="UTF-8" />
  145. <style type="text/css">
  146. table {
  147. border-collapse: collapse;
  148. width: 100%;
  149. }
  150. table, th, td {
  151. border: 1px solid orange;
  152. }
  153. th, td {
  154. padding: 3px 2px;
  155. }
  156. tr:nth-child(even) {
  157. background-color: #f2f2f2
  158. }
  159. tr:hover {background-color: #FFECBA}
  160. textarea, input, select {
  161. vertical-align: middle;
  162. height: 3em;
  163. }
  164. input[type="submit"] {
  165. border: 1px solid #888;
  166. height: 20px;
  167. padding: 0;
  168. width: 2em;
  169. }
  170. a.btn {
  171. background-color: #98b879;
  172. color: #fff;
  173. font-weight: 700;
  174. padding: 10px;
  175. text-decoration: none;
  176. text-transform: uppercase;
  177. }
  178. #message {
  179. background-color: #FFE990;
  180. margin-bottom: 8px;
  181. padding-left: 20px;
  182. }
  183. </style>
  184. </head>
  185. <body>
  186. <div id="message">
  187. <?php
  188. session_start();
  189. if ( ! empty($_SESSION['message'])) {
  190. echo $_SESSION['message'];
  191. $_SESSION['message'] = NULL;
  192. }
  193. ?>
  194. </div>
  195. <table>
  196. <tr>
  197. <th>Datum</th>
  198. <th>Beschreibung</th>
  199. <th>Status</th>
  200. <th>Bezug</th>
  201. <th>Miete warm (kalt)</th>
  202. <th>Zi</th>
  203. <th>Größe</th>
  204. <th>Ort</th>
  205. <th style="min-width:140px">Notiz</th>
  206. <th style="min-width:140px">Status ändern</th>
  207. </tr>
  208. <?php
  209. // Sort for date descending when displaying
  210. foreach ($data as $key => $row) {
  211. $date[$key] = $row['date'];
  212. $id[$key] = $row['id'];
  213. }
  214. array_multisort($date, SORT_DESC, $id, SORT_DESC, $data);
  215. $total = count($data);
  216. for ($row = 0; $row < $total; $row++) {
  217. $id = $data[$row]['id'];
  218. $date = $data[$row]['date'];
  219. $desc = $data[$row]['desc'];
  220. $descs = $data[$row]['descs'];
  221. $bezug = $data[$row]['bezug'];
  222. $mietew = $data[$row]['mietew'];
  223. $mietek = $data[$row]['mietek'];
  224. $zimmer = $data[$row]['zimmer'];
  225. $qm = $data[$row]['qm'];
  226. $ort = $data[$row]['ort'];
  227. $status = $data[$row]['status'];
  228. $note = $data[$row]['note'];
  229. $link = "https://www.immobilienscout24.de/expose/" . $id;
  230. if ($status !== "del" || $hide === "no") {
  231. echo "<tr>";
  232. echo "<td>" . $date . "</td>";
  233. echo "<td><span title='" . $desc . "'><em><a target='_blank' href='" . $link . "'>" . $descs . "...</a></em></span></td>";
  234. echo "<td>" . $status . "</td>";
  235. echo "<td>" . $bezug . "</td>";
  236. echo "<td>" . $mietew . " (<em>" . $mietek . "</em>)" . "</td>";
  237. echo "<td>" . $zimmer . "</td>";
  238. echo "<td>" . $qm . "</td>";
  239. echo "<td><a target='_blank' href='https://www.openstreetmap.org/search?query=" . $ort . "'>" . $ort . "</a></td>";
  240. echo "<td>";
  241. ?>
  242. <form action="" method="POST">
  243. <input name="do" value="update" type="hidden" />
  244. <input name="id" value="<?php echo $id; ?>" type="hidden" />
  245. <textarea name="note" rows="1" cols="14"><?php echo $note; ?></textarea>
  246. <input type="submit" value="OK">
  247. </form>
  248. <?php
  249. echo "</td>";
  250. echo "<td>";
  251. ?>
  252. <form action="" method="POST">
  253. <input name="do" value="update" type="hidden" />
  254. <input name="id" value="<?php echo $id; ?>" type="hidden" />
  255. <select name="status">
  256. <option value="del">löschen</option>
  257. <option value="alt">alt</option>
  258. <option value="NEU">NEU</option>
  259. <option value="kontaktiert">kontaktiert</option>
  260. <option value="abgelehnt">abgelehnt</option>
  261. <option value="termin">termin</option>
  262. </select>
  263. <input type="submit" value="OK">
  264. </form>
  265. <?php
  266. echo "</td>";
  267. echo "</tr>\n";
  268. }
  269. }
  270. ?>
  271. </table>
  272. <p>Letztes Update: <?php echo date('d.m.Y H:i:s', filemtime($db)); ?></p>
  273. <p>Gelöschte Inserate <a href="?hide=no">einblenden</a> / <a href=".">ausblenden</a></p>
  274. <p>Suchlink: <a target="_blank" href="<?php echo $queryurl; ?>">Klick</a></p>
  275. <p><a class="btn" href="?do=download">Update</a></p>
  276. </body>
  277. </html>
  278. <?php
  279. }
  280. ///////////////////
  281. /// do = UPDATE ///
  282. ///////////////////
  283. function immo_update($db) {
  284. global $data;
  285. read_db($db);
  286. $id = isset($_POST['id']) ? $_POST['id'] : false;
  287. $status = isset($_POST['status']) ? $_POST['status'] : "NONE";
  288. $note = isset($_POST['note']) ? $_POST['note'] : "NONE";
  289. $key = array_search($id, array_column($data, 'id'));
  290. if (strlen($key) === 0) {
  291. echo $id . " does not exist.<br />";
  292. echo "<br />";
  293. echo "<a href='.'>Back to overview</a>\n";
  294. } else {
  295. session_start(); // start Sessions
  296. if ($status != "NONE") {
  297. $data[$key]['status'] = $status;
  298. save_return_page("Changed status of " . $id . " to \"". $status . "\".");
  299. }
  300. if ($note != "NONE") {
  301. $data[$key]['note'] = $note;
  302. save_return_page("Changed note of " . $id . " to \"". $note . "\".");
  303. }
  304. }
  305. }
  306. ////////////////////////
  307. /// HELPER FUNCTIONS ///
  308. ////////////////////////
  309. // load database from file and decode it to array $data
  310. function read_db($db) {
  311. global $data; // declare $data a global variable to access it outside this function
  312. if (! file_exists($db)) {
  313. touch($db);
  314. }
  315. $file = file_get_contents($db, true);
  316. $data = json_decode($file, true);
  317. unset($file);
  318. }
  319. // Encode $data array and write database to file
  320. function write_db($db, $data) {
  321. // encode and write file
  322. $encoded = json_encode($data, JSON_PRETTY_PRINT);
  323. file_put_contents($db, $encoded, LOCK_EX);
  324. }
  325. // load URL and transform it to a XPath-searchable document
  326. function xsite($url) {
  327. libxml_use_internal_errors(true); // suppress errors
  328. $dl = file_get_contents($url);
  329. $site = new DOMDocument();
  330. $site->loadHTML($dl);
  331. $xpathvar = new Domxpath($site);
  332. return $xpathvar;
  333. }
  334. // save database, put message to session, and load refering page which shows the message
  335. function save_return_page($output) {
  336. global $db;
  337. global $data;
  338. write_db($db, $data);
  339. $_SESSION['message'] = "Update: " . $output;
  340. header('Location: ' . $_SERVER['HTTP_REFERER']);
  341. exit;
  342. }
  343. ?>