diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java b/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java
index 8fdbabaf..3e9005bc 100644
--- a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java
+++ b/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java
@@ -3,11 +3,8 @@ package nu.marginalia.integration.stackexchange.sqlite;
 import com.github.luben.zstd.Zstd;
 import gnu.trove.list.TIntList;
 import gnu.trove.list.array.TIntArrayList;
-import gnu.trove.map.TIntIntMap;
-import gnu.trove.map.hash.TIntIntHashMap;
 import lombok.SneakyThrows;
 import nu.marginalia.integration.stackexchange.xml.StackExchangeXmlPostReader;
-import org.apache.commons.compress.compressors.zstandard.ZstdUtils;
 
 import javax.xml.stream.XMLStreamException;
 import java.io.IOException;
@@ -22,8 +19,19 @@ import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.Future;
 import java.util.function.Predicate;
 
+/**  Because stackexchange's XML format is a stream of entities that reference their parent,
+ * and we want to process them in a thread-by-thread order, it is necessary to use something
+ * to essentially re-order the data.
+ * <p>
+ * This class uses SQLite to perform this task.  The actual post bodies are compressed to keep
+ * the size of the file down.  It is strongly advisable to read off an SSD and not a mechanical
+ * hard drive when processing these database files, the difference in processing time is 20 minutes
+ * vs 6+ hours.
+ * <p>
+ */
 public class StackExchangePostsDb {
 
+    /** Construct a SQLIte file containing the Posts in the stack exchange-style 7z file */
     @SneakyThrows
     public static void create(Path sqliteFile,
                        Path stackExchange7zFile) {
@@ -62,6 +70,8 @@ public class StackExchangePostsDb {
                 var post = iter.next();
                 insertPost.setInt(1, post.id());
 
+                // We invent a new field called threadId, which is the id of the post if it's
+                // a question, or the parent if it's an answer
                 if (post.parentId() == null) insertPost.setInt(2, post.id());
                 else insertPost.setInt(2, post.parentId());
 
@@ -92,6 +102,11 @@ public class StackExchangePostsDb {
         }
     }
 
+    /** Iterate over each post in the sqlite post database.
+     * Each post will be assigned an ordinal number that is different from the id of the post.  This is
+     * necessary as stackexchange's entry count exceeds the ~67 million entries that UrlIdCodec can encode
+     * for a single domain, despite having less than 67 million 'threads'.
+     * */
     @SneakyThrows
     public static void forEachPost(
             Path sqliteFile,
@@ -108,20 +123,24 @@ public class StackExchangePostsDb {
                     WHERE threadId = ?
                     """)
         ) {
+
+            // Step 1 is to export a list of thread IDs from the database
             TIntList threadIds = new TIntArrayList(10_000);
             ResultSet rs = selectThreadIds.executeQuery();
-
             while (rs.next()) {
                 threadIds.add(rs.getInt(1));
             }
 
             System.out.println("Got " + threadIds.size() + " IDs");
 
+            // Step 2: Iterate over each thread
             var idIterator = threadIds.iterator();
             int ordinal = 0;
-
             while (idIterator.hasNext()) {
-                queryPostContents.setInt(1, idIterator.next());
+                int threadId = idIterator.next();
+
+                // Query posts with this threadId
+                queryPostContents.setInt(1, threadId);
                 rs = queryPostContents.executeQuery();
 
                 List<String> parts = new ArrayList<>();
@@ -139,6 +158,7 @@ public class StackExchangePostsDb {
 
                     year = Math.min(year, rs.getInt("postYear"));
 
+                    // Decompress the bodies
                     byte[] bytes = rs.getBytes("body");
                     partWork.add(commonPool.submit(
                             () -> new String(Zstd.decompress(bytes, origSize)
@@ -149,7 +169,7 @@ public class StackExchangePostsDb {
                     parts.add(workItem.get());
                 }
 
-                if (!consumer.test(new CombinedPostModel(ordinal++, title, year, parts)))
+                if (!consumer.test(new CombinedPostModel(ordinal++, threadId, title, year, parts)))
                     break;
             }
 
@@ -161,11 +181,10 @@ public class StackExchangePostsDb {
     }
 
     public record CombinedPostModel(int ordinal,
+                                    int threadId,
                                     String title,
                                     int year,
                                     List<String> bodies)
-    {
-
-    }
+    { }
 
 }