313 lines
9.9 KiB
PHP
313 lines
9.9 KiB
PHP
<?php
|
|
/*
|
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
SPDX-FileCopyrightText: 2019 Max Mehl <mail@mehl.mx>
|
|
*/
|
|
|
|
/* load config. You normally don't want to edit something here */
|
|
require_once 'config.php';
|
|
$instance = $config['mastodon-instance'];
|
|
$uid = $config['user-id'];
|
|
$searchurl = $config['search-url'];
|
|
$search = isset($_GET['search']) ? strtolower($_GET['search']) : '';
|
|
$toot_url = isset($_GET['toot_url']) ? $_GET['toot_url'] : null;
|
|
$force_refresh = isset($_GET['force_refresh']) && $_GET['force_refresh'] == '1';
|
|
$debug_on = $config['debug'];
|
|
|
|
debug("Request parameters - search: $search, toot_url: " . ($toot_url ? $toot_url : "not provided") . ", force_refresh: " . ($force_refresh ? "yes" : "no"));
|
|
/* cache files */
|
|
$ctt = $config['cache_toots'];
|
|
$dbt = "cache-toots.json";
|
|
$ctc = $config['cache_comments'];
|
|
$dbc = "cache-comments_%id.json";
|
|
|
|
/* Force cache refresh if requested */
|
|
if ($force_refresh) {
|
|
debug("Force refresh requested - clearing toot cache");
|
|
if (file_exists($dbt)) {
|
|
unlink($dbt);
|
|
}
|
|
// Also clear all comment cache files
|
|
foreach (glob("cache-comments_*.json") as $cache_file) {
|
|
unlink($cache_file);
|
|
debug("Deleted cache file: $cache_file");
|
|
}
|
|
}
|
|
|
|
/* Exit if search empty */
|
|
if (empty($search)) {
|
|
debug("No proper search given");
|
|
die();
|
|
}
|
|
|
|
/* MISC FUNCTIONS */
|
|
function debug($data) {
|
|
global $debug_on;
|
|
if ($debug_on === true) {
|
|
error_log("[getcomments.php] " . print_r($data, TRUE));
|
|
}
|
|
}
|
|
|
|
/* Parse Mastodon toot URL to extract instance and toot ID */
|
|
function parseTootUrl($url) {
|
|
// Expected format: https://instance.example/@username/1234567890
|
|
// or: https://instance.example/users/username/statuses/1234567890
|
|
if (preg_match('|^(https?://[^/]+)/.*/(\d+)$|i', $url, $matches)) {
|
|
return [
|
|
'instance' => $matches[1],
|
|
'id' => $matches[2]
|
|
];
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/* CACHE FUNCTIONS */
|
|
/* write data to file */
|
|
function write_db($db, $data, $id) {
|
|
// if $id is given, it's a comments file. Replace placeholder in filename
|
|
if ($id) {
|
|
$db = str_replace('%id', $id, $db);
|
|
}
|
|
$file['toots'] = $data;
|
|
$file['timestamp'] = time();
|
|
// encode and write file (no pretty print for performance)
|
|
$encoded = json_encode($file);
|
|
file_put_contents($db, $encoded, LOCK_EX);
|
|
}
|
|
/* delete file */
|
|
function delete_db($db, $id) {
|
|
// if $id is given, it's a comments file. Replace placeholder in filename
|
|
if ($id) {
|
|
$db = str_replace('%id', $id, $db);
|
|
}
|
|
unlink($db);
|
|
}
|
|
/* access data from file */
|
|
function read_db($db, &$data, $cachetime, &$cachebreak, $id) {
|
|
// if $id is given, it's a comments file. Replace placeholder in filename
|
|
if ($id) {
|
|
$db = str_replace('%id', $id, $db);
|
|
}
|
|
// if DB does not exist, create it with empty array
|
|
if (! file_exists($db)) {
|
|
// if $data empty (usually with $toots, not with comment's $result), populate with empty array
|
|
if (empty($data)) {
|
|
$data = array();
|
|
}
|
|
touch($db);
|
|
write_db($db, $data, $id);
|
|
$cachebreak = true;
|
|
}
|
|
$file = file_get_contents($db, true);
|
|
$data = json_decode($file, true);
|
|
|
|
// check if timestamp in cache file too old
|
|
if (empty($data['timestamp']) || ($data['timestamp'] + $cachetime < time())) {
|
|
$cachebreak = true;
|
|
}
|
|
|
|
$data = $data['toots'];
|
|
}
|
|
|
|
/* TOOT FUNCTIONS */
|
|
function collectToots($instance, $uid, $min_id, $searchurl) {
|
|
$raw = @file_get_contents("$instance/api/v1/accounts/$uid/statuses?exclude_reblogs=true&exclude_replies=true&limit=50&min_id=$min_id");
|
|
if ($raw === false) {
|
|
debug("Failed to fetch toots from API");
|
|
return array();
|
|
}
|
|
$json_complete = json_decode($raw, true);
|
|
$json = array();
|
|
foreach ($json_complete as $toot) {
|
|
$url = analyzeToot($toot['content'], $toot['id'], $searchurl);
|
|
$json[] = array('id' => $toot['id'], 'date' => $toot['created_at'] ,'url' => $url);
|
|
}
|
|
return($json);
|
|
}
|
|
/* Find out if a toot contains the searched URL */
|
|
function analyzeToot($content, $id, $searchurl) {
|
|
debug("Searching for $searchurl in $id");
|
|
|
|
// search for $searchurl inside of <a> tags, until (and excluding) a "
|
|
preg_match("|$searchurl.+?(?=\")|i", $content, $matches);
|
|
|
|
if(!empty($matches)) {
|
|
return(strtolower($matches[0])); // take first match inside toot
|
|
} else {
|
|
return("");
|
|
}
|
|
}
|
|
/* of context, extract the interesting bits */
|
|
function filterComments($descendants, $root, &$result) {
|
|
// go through each comment
|
|
foreach ($descendants as $d) {
|
|
$result['comments'][$d['id']] = [
|
|
'author' => [
|
|
'display_name' => $d['account']['display_name'] ? $d['account']['display_name'] : $d['account']['username'],
|
|
'avatar' => $d['account']['avatar_static'],
|
|
'url' => $d['account']['url']
|
|
],
|
|
'toot' => $d['content'],
|
|
'date' => $d['created_at'],
|
|
'url' => $d['uri'],
|
|
'reply_to' => $d['in_reply_to_id'],
|
|
'root' => $root,
|
|
];
|
|
}
|
|
return $result;
|
|
}
|
|
/* get /context of toot and extract stats - combined to reduce API calls */
|
|
function tootContextAndStats($instance, $id, &$result) {
|
|
debug("Fetching context and stats for ID $id");
|
|
|
|
// Fetch context (descendants/replies)
|
|
$raw_context = @file_get_contents("$instance/api/v1/statuses/$id/context");
|
|
if ($raw_context !== false) {
|
|
$json_context = json_decode($raw_context, true);
|
|
filterComments($json_context['descendants'], $id, $result);
|
|
} else {
|
|
debug("Failed to fetch context for $id");
|
|
}
|
|
|
|
// Fetch stats
|
|
$raw_stats = @file_get_contents("$instance/api/v1/statuses/$id");
|
|
if ($raw_stats !== false) {
|
|
$json_stats = json_decode($raw_stats, true);
|
|
$result['stats']['reblogs'] = (int)$json_stats['reblogs_count'];
|
|
$result['stats']['favs'] = (int)$json_stats['favourites_count'];
|
|
$result['stats']['replies'] = (int)$json_stats['replies_count'];
|
|
$result['stats']['url'] = $json_stats['url'];
|
|
} else {
|
|
debug("Failed to fetch stats for $id");
|
|
}
|
|
}
|
|
|
|
/***************
|
|
* START PROGRAM
|
|
***************/
|
|
|
|
// create empty $result template
|
|
$result_empty = ['comments' => [], 'stats' => ['reblogs' => 0, 'favs' => 0, 'replies' => 0, 'url' => '', 'root' => 0]];
|
|
$result = $result_empty;
|
|
|
|
/* If toot_url is provided, skip the toot search entirely */
|
|
$toot_instance = $instance; // Default to config instance
|
|
$id = null; // Initialize
|
|
|
|
if ($toot_url) {
|
|
$parsed = parseTootUrl($toot_url);
|
|
if ($parsed) {
|
|
$id = $parsed['id'];
|
|
$toot_instance = $parsed['instance'];
|
|
debug("Toot URL provided: $toot_url");
|
|
debug("Extracted - Instance: $toot_instance, ID: $id");
|
|
|
|
// When using a custom instance, include instance hash in cache filename to avoid collisions
|
|
if ($toot_instance !== $instance) {
|
|
$instance_hash = md5($toot_instance);
|
|
$dbc = "cache-comments_%id-{$instance_hash}.json";
|
|
debug("Using instance-specific cache file for non-default instance");
|
|
}
|
|
} else {
|
|
debug("Failed to parse toot URL: $toot_url");
|
|
$toot_url = null; // Fall back to search
|
|
}
|
|
}
|
|
|
|
if (!$toot_url) {
|
|
/* check whether the cached file containing all toots is older than max. cache time */
|
|
// this at the same time loads the cached DB, either way
|
|
$cachebreak = $force_refresh; // Force cache break if requested
|
|
read_db($dbt, $toots, $ctt, $cachebreak, false);
|
|
|
|
if ($cachebreak) {
|
|
/* Collect all the toots */
|
|
/* get id of latest cached toot, and set as $min_id */
|
|
debug("Toots cache outdated. Checking for new toots");
|
|
if (!empty($toots['0']['id'])) {
|
|
$min_id_cached = $toots['0']['id'];
|
|
$min_id = $min_id_cached;
|
|
} else {
|
|
/* if cached toots do not exist, start from oldest toot */
|
|
$min_id = "0";
|
|
$min_id_cached = "0";
|
|
}
|
|
|
|
/* test whether there are new toots available */
|
|
// Search for toots older than the cached latest toot ID ($min_id)
|
|
$uptodate = false;
|
|
while ($uptodate === false) {
|
|
$toots = array_merge(collectToots($instance, $uid, $min_id, $searchurl), $toots);
|
|
$min_id_new = $toots['0']['id']; // the latest ID of the recent search
|
|
|
|
if ($min_id_new === $min_id) {
|
|
// min_id is the latest, let's write the new DB and end this loop
|
|
$uptodate = true;
|
|
debug("Toots up-to-date. Rewrite cache DB.");
|
|
write_db($dbt, $toots, false);
|
|
} else {
|
|
// next round looks for toots newer than the newly found ID
|
|
debug("Newer toots than in cache found. Starting another search for new toots");
|
|
$min_id = $min_id_new;
|
|
}
|
|
}
|
|
} else {
|
|
debug("Toots cache is up-to-date");
|
|
}
|
|
|
|
/* check if URL from $search exists in $toots */
|
|
$found_id = null;
|
|
foreach ($toots as $toot) {
|
|
if (!empty($toot['url']) && strpos($toot['url'], $search) !== false) {
|
|
$found_id = $toot['id']; // will keep the oldest (last in array)
|
|
}
|
|
}
|
|
|
|
if ($found_id === null) {
|
|
debug("Blog URL \"$search\" has not been found");
|
|
} else {
|
|
$id = $found_id;
|
|
}
|
|
}
|
|
|
|
/* Process comments if we have a toot ID (either from URL or search) */
|
|
if ($id) {
|
|
|
|
/* read cached comments, or reload new comments if cached data too old */
|
|
$cachebreak = false;
|
|
read_db($dbc, $result, $ctc, $cachebreak, $id);
|
|
|
|
if ($cachebreak) {
|
|
debug("Comments cache for $id outdated. Checking for new comments");
|
|
// delete old cache file, otherwise the stats would add up
|
|
delete_db($dbc, $id);
|
|
// re-create empty $result and new cache file
|
|
$result = $result_empty;
|
|
read_db($dbc, $result, $ctc, $cachebreak, $id);
|
|
/* Extract comments and stats from toot */
|
|
tootContextAndStats($toot_instance, $id, $result);
|
|
// Always count replies manually for accuracy
|
|
$result['stats']['replies'] = count($result['comments']);
|
|
$result['stats']['root'] = $id;
|
|
|
|
write_db($dbc, $result, $id);
|
|
} else {
|
|
debug("Comments cache for $id up-to-date. Returning cached comments");
|
|
}
|
|
}
|
|
|
|
// headers for not caching the results
|
|
header('Cache-Control: no-cache, must-revalidate');
|
|
header('Expires: Mon, 26 Jul 1997 05:00:00 GMT');
|
|
|
|
// headers to tell that result is JSON
|
|
header('Content-type: application/json');
|
|
|
|
// add debug flag to result for JavaScript console logging
|
|
$result['debug'] = $debug_on;
|
|
|
|
// actually output result as JSON, to be digested by getcomments.js
|
|
echo json_encode($result);
|
|
|
|
?>
|