feat: Add filter to canonicalize the format of timestamps and helper for timestamp conversion.

Signed-off-by: Sebastian Fey <info@sebastianfey.de>
This commit is contained in:
Sebastian Fey 2023-11-26 20:28:37 +01:00 коммит произвёл Christian Wolf
Родитель 79c3e2d5e6
Коммит 9872188c2a
4 изменённых файлов: 243 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,9 @@
<?php
namespace OCA\Cookbook\Exception;
class InvalidTimestampException extends \Exception {
public function __construct($message = '', $code = 0, $previous = null) {
parent::__construct($message, $code, $previous);
}
}

Просмотреть файл

@ -0,0 +1,67 @@
<?php
namespace OCA\Cookbook\Helper\Filter\JSON;
use OCA\Cookbook\Exception\InvalidTimestampException;
use OCA\Cookbook\Helper\Filter\AbstractJSONFilter;
use OCA\Cookbook\Helper\TimestampHelper;
use OCP\IL10N;
use Psr\Log\LoggerInterface;
/**
* Fix the timestamps of a recipe providing a canonical format.
*
* The filter works on the `dateCreated`, `dateModified`, and `datePublished` entries, individually.
*
* If a timestamp is not present or not able to read, it is set to null.
* The timestamp is parsed and reformatted as ISO 8601 including a timezone.
*/
class FixTimestampsFilter extends AbstractJSONFilter {
/** @var IL10N */
private $l;
/** @var LoggerInterface */
private $logger;
/** @var TimestampHelper */
private $timestampHelper;
public function __construct(
IL10N $l,
LoggerInterface $logger,
TimestampHelper $tsHelper
) {
$this->l = $l;
$this->logger = $logger;
$this->timestampHelper = $tsHelper;
}
public function apply(array &$json): bool {
$changed = false;
$this->fixTimestamp($json, 'dateCreated', $changed);
$this->fixTimestamp($json, 'dateModified', $changed);
$this->fixTimestamp($json, 'datePublished', $changed);
return $changed;
}
private function fixTimestamp(array &$json, string $type, bool &$changed): void {
if (!isset($json[$type])) {
$json[$type] = null;
$changed = true;
return;
}
$orig = $json[$type];
try {
$json[$type] = $this->timestampHelper->parseTimestamp($json[$type]);
} catch (InvalidTimestampException $ex) {
$json[$type] = null;
}
if ($orig !== $json[$type]) {
$changed = true;
}
}
}

Просмотреть файл

@ -15,6 +15,7 @@ class JSONFilter {
CleanCategoryFilter $cleanCategoryFilter,
FixRecipeYieldFilter $fixRecipeYieldFilter,
FixKeywordsFilter $fixKeywordsFilter,
FixTimestampsFilter $fixTimestampsFilter,
FixToolsFilter $fixToolsFilter,
FixIngredientsFilter $fixIngredientsFilter,
FixInstructionsFilter $fixInstructionsFilter,
@ -32,6 +33,7 @@ class JSONFilter {
$cleanCategoryFilter,
$fixRecipeYieldFilter,
$fixKeywordsFilter,
$fixTimestampsFilter,
$fixToolsFilter,
$fixIngredientsFilter,
$fixInstructionsFilter,

Просмотреть файл

@ -0,0 +1,165 @@
<?php
namespace OCA\Cookbook\Helper;
use DateTime;
use DateTimeImmutable;
use OCA\Cookbook\Exception\InvalidTimestampException;
use OCP\IL10N;
/**
* Parser for ISO8601 and other timestamps. Currently only ISO8601 is supported.
*/
class TimestampHelper {
/** @var IL10N */
private $l;
// Output format: Ignore fractions of a second
public const OUTPUT_FORMAT = 'Y-m-d\TH:i:sP';
public function __construct(IL10N $l) {
$this->l = $l;
}
/**
* Parse a string representation of a timestamp and format according to ISO8601.
* For reference, see <a href="https://en.wikipedia.org/wiki/ISO_8601">Wikipedia</a>.
*
* The output time span is in the form `YYYY-MM-DDThh:mm:ss±hh:mm`, e.g., `2023-11-25T14:25:36+01:00`.
* - YYYY - year
* - MM - month
* - DD - day
* - hh - hours
* - mm - minutes
* - ss - seconds
*
* The time before the ± defines the local time, the time after ± defines the timezone of the timestamp. In order to
* calculate the time in UTC, the value after ± needs to be added/subtracted from the local time.
*
* @param string $timestamp The timestamp to parse
* @return string The timestamp in ISO8601 format
* @throws InvalidTimestampException if the input data could not be parsed successfully.
*/
public function parseTimestamp(string $timestamp): string {
// For now, we only support the ISO8601 format because it is required in the schema.org standard
try {
return $this->parseIsoFormat($timestamp);
} catch (InvalidTimestampException) {
// We do nothing here. Check the next format
}
// No more formats are available.
throw new InvalidTimestampException($this->l->t('Could not parse timestamp {timestamp}', ['timestamp' => $timestamp]));
}
/**
* Parses the string $timestamp and checks if it has a valid ISO 8601 format. Otherwise, throws
* InvalidTimestampException.
*
* For reference of ISO 8601, see <a href="https://en.wikipedia.org/wiki/ISO_8601">Wikipedia</a>.
* @param string $timestamp Timestamp to be parsed.
* @return string
* @throws InvalidTimestampException if $timestamp does not comply to ISO 8601.
*/
private function parseIsoFormat(string $timestamp): string {
try {
return $this->parseIsoCalendarDateFormat($timestamp, '-');
} catch (InvalidTimestampException) { // Check next format
}
try {
return $this->parseIsoCalendarDateFormat($timestamp, '');
} catch (InvalidTimestampException) { // Check next format
}
try {
return $this->parseIsoWeekDateFormat($timestamp, '-');
} catch (InvalidTimestampException) { // Check next format
}
return $this->parseIsoWeekDateFormat($timestamp, '');
}
/**
* Parses the string $timestamp and checks if it has a valid ISO 8601 with date as year, month, and day.
* Otherwise, throws InvalidTimestampException.
*
* For reference of ISO 8601, see <a href="https://en.wikipedia.org/wiki/ISO_8601">Wikipedia</a>.
* @param string $timestamp Timestamp to be parsed
* @param string $dateSeparator Separator to be used between `YYYY`, `mm`, and `dd`.
* @return string
* @throws InvalidTimestampException if $timestamp does not comply to ISO 8601 with week and weekday.
*/
private function parseIsoCalendarDateFormat(string $timestamp, string $dateSeparator = '-'): string {
$date = "Y".$dateSeparator."m".$dateSeparator."d";
return $this->parseIsoTimestampWithTimeFormats($timestamp, $date);
}
/**
* Parses the string $timestamp and checks if it has a valid ISO 8601 format with date defined as week and weekday.
* Otherwise, throws InvalidTimestampException.
*
* For reference of ISO 8601, see <a href="https://en.wikipedia.org/wiki/ISO_8601">Wikipedia</a>.
* @param string $timestamp Timestamp to be parsed
* @param string $dateSeparator Separator to be used between `YYYY`, `mm`, and `dd`.
* @return string
* @throws InvalidTimestampException if $timestamp does not comply to ISO 8601 with week and weekday.
*/
private function parseIsoWeekDateFormat(string $timestamp, string $dateSeparator = '-'): string {
$pattern = "/^(?!$)(\d\d\d\d)" . $dateSeparator . "W(\d\d)" . $dateSeparator . "(\d)T.*$/";
$ret = preg_match($pattern, trim($timestamp), $matches);
if ($ret === 1) {
// Convert week format to calendar format for date
$tmpDate = new DateTime('midnight');
// $matches[0] is the complete string
// $matches[1] to $matches[3] is year, week, weekday
$tmpDate->setISODate((int)$matches[1], (int)$matches[2], (int)$matches[3]);
$tmpDateString = $tmpDate->format('Y-m-d\TH:i:sP');
// Combine converted date with original time
$timePartOfTimestamp = substr($timestamp, 8 + 2 * mb_strlen($dateSeparator));
$datePartOfConvertedTimestamp = substr($tmpDateString, 0, 10);
$updatedTimestamp = $datePartOfConvertedTimestamp . $timePartOfTimestamp;
// Parse complete date including time
$dateFormat = 'Y-m-d';
return $this->parseIsoTimestampWithTimeFormats($updatedTimestamp, $dateFormat);
}
throw new InvalidTimestampException($this->l->t('Could not parse timestamp {timestamp}', ['timestamp' => $timestamp]));
}
/**
* Parses the string $timestamp and checks if it has a valid ISO 8601. Uses the date format given by $dateFormat
* and checks several allowed formats for the time.
*
* For reference of ISO 8601, see <a href="https://en.wikipedia.org/wiki/ISO_8601">Wikipedia</a>.
* @param string $timestamp Timestamp to be parsed
* @param string $dateFormat Format to be used for the date portion
* @return string
* @throws InvalidTimestampException if $timestamp does not comply to any of the checked formats allowed by ISO 8601.
*/
private function parseIsoTimestampWithTimeFormats(string $timestamp, string $dateFormat): string {
// Try parsing timestamp without milliseconds
$dt = DateTimeImmutable::createFromFormat($dateFormat . "\\TH:i:sP", $timestamp);
if($dt) {
return $dt->format(self::OUTPUT_FORMAT);
}
// Try parsing timestamp with dot-separated milliseconds
$dt = DateTimeImmutable::createFromFormat($dateFormat . "\\TH:i:s.vP", $timestamp);
if($dt) {
return $dt->format(self::OUTPUT_FORMAT);
}
// Try parsing timestamp with comma-separated milliseconds
$dt = DateTimeImmutable::createFromFormat($dateFormat . "\\TH:i:s,vP", $timestamp);
if($dt) {
return $dt->format(self::OUTPUT_FORMAT);
}
throw new InvalidTimestampException($this->l->t('Could not parse timestamp {timestamp}', ['timestamp' => $timestamp]));
}
}