From 41d896ba3e5b0ea3ff2890eb34fe8528ecfacfa1 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 22 Jan 2024 17:52:14 +0100 Subject: [PATCH] (converter) Refactor content type check in PlainTextDocumentProcessorPlugin The method `isApplicable` in the `PlainTextDocumentProcessorPlugin` was refactored to handle a wider range of content types beyond merely "text/plain". It now also handles any content type that starts with "text/plain;", to accomodate contentTypes that append a charset as well. --- .../plugin/PlainTextDocumentProcessorPlugin.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java index 7bb94eac..787cc8a0 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java @@ -54,7 +54,14 @@ public class PlainTextDocumentProcessorPlugin extends AbstractDocumentProcessorP @Override public boolean isApplicable(CrawledDocument doc) { - return doc.contentType.equalsIgnoreCase("text/plain"); + String contentType = doc.contentType.toLowerCase(); + + if (contentType.equals("text/plain")) + return true; + if (contentType.startsWith("text/plain;")) // charset=blabla + return true; + + return false; } @Override