My personal website built with Hugo https://mehl.mx
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

249 lines
7.8 KiB

<?php
/* load config. You normally don't want to edit something here */
require_once 'config.php';
$instance = $config['mastodon-instance'];
$uid = $config['user-id'];
$searchurl = $config['search-url'];
$search = isset($_GET['search']) ? strtolower($_GET['search']) : '';
$debug_on = $config['debug'];
/* cache files */
$ctt = $config['cache_toots'];
$dbt = "cache-toots.json";
$ctc = $config['cache_comments'];
$dbc = "cache-comments_%id.json";
/* Exit if search empty */
if (empty($search)) {
debug("No proper search given");
die();
}
/* MISC FUNCTIONS */
function debug($data) {
global $debug_on;
if ($debug_on === true) {
error_log("[getcomments.php] " . print_r($data, TRUE));
}
}
/* CACHE FUNCTIONS */
/* write data to file */
function write_db($db, $data, $id) {
// if $id is given, it's a comments file. Replace placeholder in filename
if ($id) {
$db = str_replace('%id', $id, $db);
}
$file['toots'] = $data;
$file['timestamp'] = time();
// encode and write file
$encoded = json_encode($file, JSON_PRETTY_PRINT);
file_put_contents($db, $encoded, LOCK_EX);
}
/* delete file */
function delete_db($db, $id) {
// if $id is given, it's a comments file. Replace placeholder in filename
if ($id) {
$db = str_replace('%id', $id, $db);
}
unlink($db);
}
/* access data from file */
function read_db($db, &$data, $cachetime, &$cachebreak, $id) {
// if $id is given, it's a comments file. Replace placeholder in filename
if ($id) {
$db = str_replace('%id', $id, $db);
}
// if DB does not exist, create it with empty array
if (! file_exists($db)) {
// if $data empty (usually with $toots, not with comment's $result), populate with empty array
if (empty($data)) {
$data = array();
}
touch($db);
write_db($db, $data, $id);
$cachebreak = true;
}
$file = file_get_contents($db, true);
$data = json_decode($file, true);
// check if timestamp in cache file too old
if (empty($data['timestamp']) || ($data['timestamp'] + $cachetime < time())) {
$cachebreak = true;
}
$data = $data['toots'];
}
/* TOOT FUNCTIONS */
function collectToots($instance, $uid, $min_id, $searchurl) {
$raw = file_get_contents("$instance/api/v1/accounts/$uid/statuses?exclude_reblogs=true&exclude_replies=true&limit=50&min_id=$min_id");
$json_complete = json_decode($raw, true);
$json = array();
foreach ($json_complete as $toot) {
$json[] = array('id' => $toot['id'], 'date' => $toot['created_at'] ,'url' => analyzeToot($instance, $toot['id'], $searchurl));
}
return($json);
}
/* Find out if a toot contains the searched URL */
function analyzeToot($instance, $id, $searchurl) {
debug("Searching for $searchurl in $id");
$raw = file_get_contents("$instance/api/v1/statuses/$id");
$json = json_decode($raw, true);
// search for $searchurl inside of <a> tags, until (and excluding) a "
preg_match("|$searchurl.+?(?=\")|i", $json['content'], $matches);
if(!empty($matches)) {
return(strtolower($matches[0])); // take first match inside toot
} else {
return("");
}
}
/* of context, extract the interesting bits */
function filterComments($descendants, $root, &$result) {
// go through each comment
foreach ($descendants as $d) {
$result['comments'][$d['id']] = [
'author' => [
'display_name' => $d['account']['display_name'] ? $d['account']['display_name'] : $d['account']['username'],
'avatar' => $d['account']['avatar_static'],
'url' => $d['account']['url']
],
'toot' => $d['content'],
'date' => $d['created_at'],
'url' => $d['uri'],
'reply_to' => $d['in_reply_to_id'],
'root' => $root,
];
}
return $result;
}
/* get /context of toot */
function tootContext($instance, $id, &$result) {
$raw = file_get_contents("$instance/api/v1/statuses/$id/context");
$json = json_decode($raw, true);
filterComments($json['descendants'], $id, $result);
}
/* extract stats info from toot */
function filterStats($stats) {
$result = [
'reblogs' => (int)$stats['reblogs_count'],
'favs' => (int)$stats['favourites_count'],
'replies' => (int)$stats['replies_count'],
'url' => $stats['url']
];
return $result;
}
/* for toot, extract interesting statistics */
function tootStats($instance, $id, &$result) {
debug("Checking ID $id");
$raw = file_get_contents("$instance/api/v1/statuses/$id");
$json = json_decode($raw, true);
$newStats = filterStats($json);
$result['stats']['reblogs'] += $newStats['reblogs'];
$result['stats']['favs'] += $newStats['favs'];
$result['stats']['replies'] += $newStats['replies'];
if (empty($result['stats']['url'])) {
$result['stats']['url'] = $newStats['url'];
}
}
/***************
* START PROGRAM
***************/
/* check whether the cached file containing all toots is older than max. cache time */
// this at the same time loads the cached DB, either way
$cachebreak = false;
read_db($dbt, $toots, $ctt, $cachebreak, false);
if ($cachebreak) {
/* Collect all the toots */
/* get id of latest cached toot, and set as $min_id */
debug("Toots cache outdated. Checking for new toots");
if (!empty($toots['0']['id'])) {
$min_id_cached = $toots['0']['id'];
$min_id = $min_id_cached;
} else {
/* if cached toots do not exist, start from oldest toot */
$min_id = "0";
$min_id_cached = "0";
}
/* test whether there are new toots available */
// Search for toots older than the cached latest toot ID ($min_id)
$uptodate = false;
while ($uptodate === false) {
$toots = array_merge(collectToots($instance, $uid, $min_id, $searchurl), $toots);
$min_id_new = $toots['0']['id']; // the latest ID of the recent search
if ($min_id_new === $min_id) {
// min_id is the latest, let's write the new DB and end this loop
$uptodate = true;
debug("Toots up-to-date. Rewrite cache DB.");
write_db($dbt, $toots, false);
} else {
// next round looks for toots newer than the newly found ID
debug("Newer toots than in cache found. Starting another search for new toots");
$min_id = $min_id_new;
}
}
} else {
debug("Toots cache is up-to-date");
}
// create empty $result
$result_empty = ['comments' => [], 'stats' => ['reblogs' => 0, 'favs' => 0, 'replies' => 0, 'url' => '', 'root' => 0]];
$result = $result_empty;
/* check if URL from $search exists in $toots */
$id = array_keys(
array_filter(
array_column($toots, 'url'),
function ($value) use ($search) {
return (strpos($value, $search) !== false);
}
)
);
if (empty($id)) {
debug("Blog URL \"$search\" has not been found");
} else {
// if multiple toots with the searched URL exist, take the oldest one (largest array index)
$id = $toots[end($id)]['id'];
/* read cached comments, or reload new comments if cached data too old */
$cachebreak = false;
read_db($dbc, $result, $ctc, $cachebreak, $id);
if ($cachebreak) {
debug("Comments cache for $id outdated. Checking for new comments");
// delete old cache file, otherwise the stats would add up
delete_db($dbc, $id);
// re-create empty $result and new cache file
$result = $result_empty;
read_db($dbc, $result, $ctc, $cachebreak, $id);
/* Extract comments and stats from toot */
tootContext($instance, $id, $result);
tootStats($instance, $id, $result);
// FIXME: At the moment the API doesn't return the correct replies count so I count it manually
$result['stats']['replies'] = count($result['comments']);
$result['stats']['root'] = $id;
write_db($dbc, $result, $id);
} else {
debug("Comments cache for $id up-to-date. Returning cached comments");
}
}
// headers for not caching the results
header('Cache-Control: no-cache, must-revalidate');
header('Expires: Mon, 26 Jul 1997 05:00:00 GMT');
// headers to tell that result is JSON
header('Content-type: application/json');
// actually output result as JSON, to be digested by getcomments.js
echo json_encode($result);
?>