diff options
| author | Benjamin Culkin <scorpress@gmail.com> | 2026-03-04 20:15:50 -0500 |
|---|---|---|
| committer | Benjamin Culkin <scorpress@gmail.com> | 2026-03-04 20:15:50 -0500 |
| commit | c6d435b3f4f8456fb2aa8fc4b4a6941d13974991 (patch) | |
| tree | c507ec63ce36152ab5959cce14d4b62806908aeb | |
| parent | dde33dd8a8abb5418a04120e70da7998bc2e069c (diff) | |
This reorganizes some of the code that will be used for parsing
messages, as well as changing the loading of messages from JSON to use
SwingWorker for loading
14 files changed, 835 insertions, 427 deletions
diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/GPTConversationDB.java b/firmal/src/main/java/bjc/firmal/gptbrowser/GPTConversationDB.java index 14f9055..6041d1d 100644 --- a/firmal/src/main/java/bjc/firmal/gptbrowser/GPTConversationDB.java +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/GPTConversationDB.java @@ -7,12 +7,15 @@ import java.util.List; import java.util.Set; import bjc.data.Pair; +import bjc.firmal.gptbrowser.msginfo.ParsedMessage; +import bjc.firmal.gptbrowser.msginfo.RawMessageDB; public class GPTConversationDB { private String id; private String title; - private List<RawMessageDB> messages; + private List<RawMessageDB> rawmessages; + private List<ParsedMessage> parsedMessages; private Set<String> messagesSeen; @@ -30,18 +33,18 @@ public class GPTConversationDB { this.title = title; } - public void addMessage(RawMessageDB message) { - messages.add(message); + public void addRawMessage(RawMessageDB message) { + rawmessages.add(message); messagesSeen.add(message.getMessageID()); // TODO check if there should be a warning for adding a message // that has a different conversation ID than this conversation. } - public List<RawMessageDB> getMessages() { - return messages; + public List<RawMessageDB> getRawMessages() { + return rawmessages; } - public boolean hasSeenMessage(String msgID) { + public boolean hasSeenRawMessage(String msgID) { return messagesSeen.contains(msgID); } @@ -49,7 +52,7 @@ public class GPTConversationDB { this.id = id; this.title = title; - this.messages = new ArrayList<>(); + this.rawmessages = new ArrayList<>(); this.messagesSeen = new HashSet<>(); - } + } } diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/GPTJSONBrowserFrame.java b/firmal/src/main/java/bjc/firmal/gptbrowser/GPTJSONBrowserFrame.java index 879d636..7006d91 100644 --- a/firmal/src/main/java/bjc/firmal/gptbrowser/GPTJSONBrowserFrame.java +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/GPTJSONBrowserFrame.java @@ -1,444 +1,40 @@ package bjc.firmal.gptbrowser; import java.awt.BorderLayout; -import java.awt.Dialog.ModalityType; -import java.awt.event.ActionEvent; -import java.awt.event.ActionListener; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; import javax.swing.DefaultListModel; -import javax.swing.JButton; import javax.swing.JDesktopPane; -import javax.swing.JDialog; -import javax.swing.JFileChooser; -import javax.swing.JFrame; import javax.swing.JInternalFrame; -import javax.swing.JLabel; import javax.swing.JList; import javax.swing.JMenu; import javax.swing.JMenuBar; import javax.swing.JMenuItem; -import javax.swing.JPanel; -import javax.swing.JProgressBar; import javax.swing.JScrollPane; import javax.swing.JSplitPane; import javax.swing.JTextPane; import javax.swing.ListSelectionModel; -import javax.swing.SwingUtilities; -import javax.swing.SwingWorker; -import org.json.JSONObject; -import org.json.JSONTokener; - -import bjc.firmal.Firmal; -import bjc.functypes.ClosableThrowFunction; +import bjc.firmal.gptbrowser.msginfo.RawMessageDB; import bjc.utils.gui.DelegateListCellRenderer; import bjc.utils.gui.JsonEditorKit; -import bjc.utils.gui.panels.BatchTaskProgressPanel.BatchHandle; -import bjc.utils.misc.NamedPreparedStatement; import bjc.utils.misc.SmartJSONFormatter; -import org.json.JSONArray; - /** * UI frame for the GPT browser */ public class GPTJSONBrowserFrame { - private final static int BATCH_THRESHOLD = 500; + /** + * The threshold for performing batch commits at + */ + public final static int BATCH_THRESHOLD = 500; // Conversation data private JList<GPTConversationDB> conversationListUI; - private DefaultListModel<GPTConversationDB> conversationListModel; + DefaultListModel<GPTConversationDB> conversationListModel; /** - * Worker task to save a conversation to the DB + * Construct a new browser frame */ - private final class SaveConversationTask extends SwingWorker<Void, Integer> { - private GPTConversationDB conversation; - private ClosableThrowFunction<List<NamedPreparedStatement.Args>, CompletableFuture<List<Integer>>, SQLException> insertConvFunc; - private ClosableThrowFunction<List<NamedPreparedStatement.Args>, CompletableFuture<List<Integer>>, SQLException> insertMessageFunc; - - private String note; - private CountDownLatch msgCounter; - - /** - * Create a DB save worker task - * @param conv The conversation to save - * @param convUpdate The DB function for conversation updates - * @param msgUpdate The DB function for message updates - * @param msgCounter The message counter - */ - public SaveConversationTask(GPTConversationDB conv, - ClosableThrowFunction<List<NamedPreparedStatement.Args>, CompletableFuture<List<Integer>>, SQLException> convUpdate, - ClosableThrowFunction<List<NamedPreparedStatement.Args>, CompletableFuture<List<Integer>>, SQLException> msgUpdate, - CountDownLatch msgCounter) { - this.conversation = conv; - this.insertConvFunc = convUpdate; - this.insertMessageFunc = msgUpdate; - this.msgCounter = msgCounter; - } - - @Override - protected Void doInBackground() throws Exception { - List<NamedPreparedStatement.Args> insertConvArgs = new ArrayList<>(1); - - NamedPreparedStatement.Args.Builder insertConvRecord = NamedPreparedStatement.Args.builder(); - insertConvRecord.setString("id", conversation.getID()); - insertConvRecord.setString("title", conversation.getTitle()); - insertConvArgs.add(insertConvRecord.build()); - - List<RawMessageDB> messages = conversation.getMessages(); - List<NamedPreparedStatement.Args> insertMessageArgs = new ArrayList<>(messages.size()); - - int totalNumMessages = messages.size(); - int currNumMessages = 0; - - for (RawMessageDB message : messages) { - NamedPreparedStatement.Args.Builder insertMessageRecord = NamedPreparedStatement.Args.builder(); - - if (isCancelled()) break; - - int currProgress = Math.min(100, (currNumMessages / totalNumMessages) * 100); - setProgress(currProgress); - - String parentID = message.getParentMessageID(); - if (conversation.hasSeenMessage(parentID)) { - String newNote = "Saving message " + currNumMessages + " of " + totalNumMessages; - firePropertyChange("note", note, newNote); - note = newNote; - - - insertMessageRecord.setString("selfid", message.getMessageID()); - insertMessageRecord.setString("convid", message.getConversationID()); - insertMessageRecord.setString("body", message.getMessageBody()); - // TODO figure out why we are getting constraint violations here. - // Do we really need to leave this null initially, then backfill it? - // Or do we need to be doing these as independent DB queries instead of batching them? - // That sounds rather inefficient, but so is doing a second pass to fill it later - // insertMessage.setString("parentid", message.getParentMessageID()); - insertMessageArgs.add(insertMessageRecord.build()); - } else { - // TODO: this message has a missing/incorrect parent link - } - } - - String newNote = "Starting save of " + BATCH_THRESHOLD + " messages/conversations to the DB"; - firePropertyChange("note", note, newNote); - note = newNote; - - // Save our changes for this conversation - Future<List<Integer>> insertConversationResults = insertConvFunc.apply(insertConvArgs); - - for (int i : insertConversationResults.get()) { - if (i != 0 && i != 1) { - // TODO: do something about an oddity - } - } - - Future<List<Integer>> insertMessageResults = insertMessageFunc.apply(insertMessageArgs); - for (int i : insertMessageResults.get()) { - if (i != 0 && i != 1) { - // TODO handle oddities - } - } - - newNote = "Saved " + BATCH_THRESHOLD + " messages/conversations to the DB"; - firePropertyChange("note", note, newNote); - - - return null; - } - - @Override - protected void done() { - msgCounter.countDown(); - } - - public String getNote() { - return note; - } - public void setNote(String note) { - this.note = note; - } - } - - private final class SaveConversationToDBListener implements ActionListener { - private final JDesktopPane deskPane; - - private SaveConversationToDBListener(JDesktopPane deskPane) { - this.deskPane = deskPane; - } - - @Override - public void actionPerformed(ActionEvent aev) { - Firmal fm = Firmal.fm; - - NamedPreparedStatement.Args.Builder insertConvShape = NamedPreparedStatement.Args.builder(); - insertConvShape.setString("id", null); - insertConvShape.setString("title", null); - - NamedPreparedStatement.Args.Builder insertMessageShape = NamedPreparedStatement.Args.builder(); - insertMessageShape.setString("selfid", null); - insertMessageShape.setString("convid", null); - insertMessageShape.setString("body", null); - - try { - var insertConversation = fm.createQueuedUpdater( - "insert into chatgpt.conversations (conversation_id, conversation_title)" - + " values (:id::uuid, :title) on conflict (conversation_id) do nothing", insertConvShape.build()); - var insertMessage = fm.createQueuedUpdater( - "insert into chatgpt.raw_messages (message_id, conversation_id, message_body) " - + "values (:selfid::uuid, :convid::uuid, :body::json)" - + " on conflict (message_id) do nothing", insertMessageShape.build()); - - Iterator<GPTConversationDB> conversations = conversationListModel.elements().asIterator(); - - BatchHandle saveBatch = fm.createTaskBatch("Save Raw ChatGPT Conversations to DB"); - - int totalConversations = conversationListModel.getSize(); - int currConversation = 0; - - CountDownLatch msgCounter = new CountDownLatch(totalConversations); - - while (conversations.hasNext()) { - currConversation++; - - GPTConversationDB conversation = conversations.next(); - - SaveConversationTask saveTask = new SaveConversationTask(conversation, insertConversation, insertMessage, msgCounter); - String taskDesc = "Saving conversation " + currConversation + " of " + totalConversations + ": " + conversation.getTitle(); - saveBatch.monitorSwingWorker(saveTask, taskDesc, true); - - saveTask.execute(); - } - - // Make sure our statements are cleaned up once we are done - Thread cleanupThread = new Thread(() -> { - try { - msgCounter.await(); - - insertMessage.close(); - insertConversation.close(); - } catch (InterruptedException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - }); - cleanupThread.start(); - } catch (SQLException sqlex) { - JFrame mainFrame = null; - JDialog errorDialog = new JDialog(mainFrame, "Error interfacing with DB"); - - JLabel headerLabel = new JLabel("Error interfacing with database"); - - JLabel errorDetails = new JLabel(sqlex.getLocalizedMessage()); - - JButton okButton = new JButton("OK"); - okButton.addActionListener((aev2) -> { - errorDialog.dispose(); - }); - - errorDialog.add(BorderLayout.PAGE_START, headerLabel); - errorDialog.add(BorderLayout.PAGE_END, okButton); - errorDialog.add(BorderLayout.CENTER, errorDetails); - - errorDialog.pack(); - errorDialog.setVisible(true); - } catch (Exception e1) { - // TODO Auto-generated catch block - e1.printStackTrace(); - } - } - } - - // TODO update this to use the new SwingWorker / BatchTaskProgressPanel infrastructure - private static final class LoadGPTJSONListener implements ActionListener { - public static enum ParseMode { - /** Capture the whole JSON - don't parse it further. */ - RAW, - /** Parse all of the JSON as completely as possible and store it that way. */ - COMPLETE, - /** - * Parse and store only enough of the JSON to provide the user-visible output. - * - * This skips the thought-records, turn summaries and other associated metadata - */ - EXPORT - } - - private final JInternalFrame newFrame; - private DefaultListModel<GPTConversationDB> listModel; - - private LoadGPTJSONListener(JInternalFrame newFrame, DefaultListModel<GPTConversationDB> listModel) { - this.newFrame = newFrame; - this.listModel = listModel; - } - - @Override - public void actionPerformed(ActionEvent aev) { - JFileChooser chooser = new JFileChooser(); - int openResults = chooser.showOpenDialog(newFrame); - if (openResults != JFileChooser.APPROVE_OPTION) - return; - - List<GPTConversationDB> conversationList = new ArrayList<>(); - try (FileReader fr = new FileReader(chooser.getSelectedFile())) { - JSONTokener loader = new JSONTokener(fr); - - JSONArray conversations = new JSONArray(loader); - - int numConversations = conversations.length(); - for (int i = 0; i < numConversations; i++) { - JSONObject conversation = conversations.optJSONObject(i); - if (conversation == null) { - // Blank conversation? - continue; - } - - GPTConversationDB parsedConversation = parseConversationRaw(conversation); - conversationList.add(parsedConversation); - listModel.addElement(parsedConversation); - } - } catch (FileNotFoundException fnfex) { - // TODO: better error handling - fnfex.printStackTrace(); - } catch (IOException ioex) { - ioex.printStackTrace(); - } - - // TODO present conversations via UI - // TODO provide import from DB via JDBC - // TODO finalize the primary parsing mode - complete - } - - - private GPTConversationDB parseConversationRaw(JSONObject conversation) { - return parseConversation(conversation, ParseMode.RAW); - } - - private GPTConversationDB parseConversation(JSONObject conversation, ParseMode mode) { - - String title = conversation.optString("title", "Untitled"); - String id = conversation.getString("id"); - - GPTConversationDB dbConversation = new GPTConversationDB(id, title); - - JSONObject mappings = conversation.getJSONObject("mapping"); - Iterator<String> mappingKeys = mappings.keys(); - - while (mappingKeys.hasNext()) { - String mappingKey = mappingKeys.next(); - JSONObject mapping = mappings.getJSONObject(mappingKey); - - /* These messages possibly need to be organized into a tree using the 'parent' / 'children' fields. - Might actually be easier to just store it in the DB and then reconstruct the tree from that - info later instead of trying to construct it fully in memory - */ - // Note on the tree thing: What we actually want to do is condense singular tree levels - - if (mapping.opt("message") == null) { - // No message, can ignore for now - continue; - } - - if (mode == ParseMode.RAW) { - String selfID = mappingKey; - String parentID = mapping.optString("parent", ""); - String rawMessage = mapping.toString(); - - RawMessageDB dbMessage = new RawMessageDB(selfID, id, rawMessage, parentID); - dbConversation.addMessage(dbMessage); - - continue; - } - // Also to consider, do we want to just store the raw messages into the DB? - // that will allow us to go back and re-parse them later - String selfID = mappingKey; - String parentID = mapping.optString("parent", ""); - // Consider if we should read the children - - JSONObject message = mapping.getJSONObject("message"); - - // A field to possibly read is 'recipient' and/or 'channel' which may do a better job - // at ID-ing messages that don't need to be visually shown - JSONObject messageMetadata = message.optJSONObject("metadata"); - if (messageMetadata == null - || messageMetadata.optBoolean("is_visually_hidden_from_conversation", false) == false) { - // Hidden message, skip - // NOTE: there do appear to be certain message that we may want to keep regardless - continue; - } - // Consider if we should grab turn_summary from the metadata - // Also, metadata has branching_from_conversation_id, branching_from_conversation_title etc - - // In metadata, we also have the attachments object, which contains info about attachments - - // Metadata also has the 'reasoning_status' / 'message_type' fields, but only some of the time. - - // Another metadata field is 'aggregate_result' which seems tied to code output in various ways - JSONObject authorData = message.getJSONObject("author"); - - String author = authorData.getString("role"); - // If the author is 'tool', that may need to get handled specially - JSONObject messageContent = message.getJSONObject("content"); - - String contentType = messageContent.getString("content_type"); - switch (contentType) { - // Should contentType get enum-ified? - case "text": { - // Text to integrate - // NOTE: thing to consider later is that a decent chunk of these text files are markdown - // and will need to be displayed that way - - // Also, text that is sent to python can be code that is executed - StringBuilder content = new StringBuilder(); - JSONArray messageParts = messageContent.getJSONArray("parts"); - int numParts = messageParts.length(); - for (int j = 0; j < numParts; j++) { - // I think we can just collate directly for this content type, but I'm not - // convinced it is the right behavior - content.append(messageParts.getString(j)); - } - } - case "reasoning_recap": - case "thoughts": - // Consider tying these to their associated message - // GPT metadata, worth recording, but not exposing - // Notably, `thoughts` seems like it might vary in form depending - // on the model or time of use - case "code": - // This contains code, and will likely need post-processing - // There is the 'language' tag for ID'ing languages - case "execution_output": - // This is also tied to code - default: - // Unknown content type - } - - // NOTE: Given some of the stuff, we probably need a fuller abstraction for this. - // The current one is Conversation -> Pairs of User/Assistant message - // However, we probably want to use distinct types for those so that we can properly - // associate all of the provided metadata to it - } - - return dbConversation; - } - } - public GPTJSONBrowserFrame() { conversationListModel = new DefaultListModel<>(); conversationListUI = new JList<>(conversationListModel); @@ -447,6 +43,12 @@ public class GPTJSONBrowserFrame { conversationListUI.setCellRenderer(new DelegateListCellRenderer<GPTConversationDB>(GPTConversationDB::getTitle)); } + /** + * Create a new GUI instance of this frame + * + * @param deskPane The desktop pane this frame will go into + * @return The GUI instance for this frame + */ public static JInternalFrame makeGPTJSONBrowserFrame(JDesktopPane deskPane) { GPTJSONBrowserFrame frame = new GPTJSONBrowserFrame(); @@ -468,12 +70,12 @@ public class GPTJSONBrowserFrame { JMenu fileMenu = new JMenu("File"); JMenuItem saveConversations = new JMenuItem("Save Conversations..."); - saveConversations.addActionListener(new SaveConversationToDBListener(deskPane)); + saveConversations.addActionListener(new SaveConversationToDBListener(this, deskPane)); fileMenu.add(saveConversations); JMenuItem loadConversations = new JMenuItem("Load Conversations..."); loadConversations.addActionListener((aev) -> { - + // TODO write me :) }); fileMenu.add(loadConversations); @@ -518,7 +120,7 @@ public class GPTJSONBrowserFrame { conversationMessageModel.clear(); - for (RawMessageDB rawMessage : conversation.getMessages()) { + for (RawMessageDB rawMessage : conversation.getRawMessages()) { conversationMessageModel.addElement(rawMessage); } }); diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/LoadGPTJSONListener.java b/firmal/src/main/java/bjc/firmal/gptbrowser/LoadGPTJSONListener.java new file mode 100644 index 0000000..853ffc9 --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/LoadGPTJSONListener.java @@ -0,0 +1,219 @@ +package bjc.firmal.gptbrowser; + +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import javax.swing.DefaultListModel; +import javax.swing.JFileChooser; +import javax.swing.JInternalFrame; +import javax.swing.SwingUtilities; +import javax.swing.SwingWorker; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.json.JSONTokener; + +import bjc.firmal.Firmal; +import bjc.firmal.gptbrowser.msginfo.RawMessageDB; +import bjc.utils.gui.panels.BatchTaskProgressPanel.BatchHandle; +import bjc.utils.gui.panels.BatchTaskProgressPanel.TaskHandle; + +final class LoadGPTJSONListener implements ActionListener { + private final class LoadGPTJSONWorker extends SwingWorker<Void, Integer> { + private final File selectedFile; + private final List<GPTConversationDB> conversationList; + + private LoadGPTJSONWorker(File selectedFile, List<GPTConversationDB> conversationList) { + this.selectedFile = selectedFile; + this.conversationList = conversationList; + } + + public Void doInBackground() { + try (FileReader fr = new FileReader(selectedFile)) { + JSONTokener loader = new JSONTokener(fr); + + JSONArray conversations = new JSONArray(loader); + + int numConversations = conversations.length(); + for (int i = 0; i < numConversations; i++) { + JSONObject conversation = conversations.optJSONObject(i); + if (conversation == null) { + // Blank conversation? + continue; + } + + GPTConversationDB parsedConversation = parseConversationRaw(conversation); + conversationList.add(parsedConversation); + SwingUtilities.invokeLater(() -> listModel.addElement(parsedConversation)); + + int currProgress = Math.min(100, (int) (((double)i / (double)numConversations) * 100)); + setProgress(currProgress); + } + } catch (FileNotFoundException fnfex) { + // TODO: better error handling + fnfex.printStackTrace(); + } catch (IOException ioex) { + ioex.printStackTrace(); + } + + return null; + } + } + + public static enum ParseMode { + /** Capture the whole JSON - don't parse it further. */ + RAW, + /** Parse all of the JSON as completely as possible and store it that way. */ + COMPLETE, + /** + * Parse and store only enough of the JSON to provide the user-visible output. + * + * This skips the thought-records, turn summaries and other associated metadata + */ + EXPORT + } + + private final JInternalFrame newFrame; + private DefaultListModel<GPTConversationDB> listModel; + + LoadGPTJSONListener(JInternalFrame newFrame, DefaultListModel<GPTConversationDB> listModel) { + this.newFrame = newFrame; + this.listModel = listModel; + } + + @Override + public void actionPerformed(ActionEvent aev) { + JFileChooser chooser = new JFileChooser(); + int openResults = chooser.showOpenDialog(newFrame); + if (openResults != JFileChooser.APPROVE_OPTION) + return; + + List<GPTConversationDB> conversationList = new ArrayList<>(); + File selectedFile = chooser.getSelectedFile(); + + BatchHandle bgBatch = Firmal.fm.createTaskBatch("Load conversations from " + selectedFile.getName()); + SwingWorker<Void, Integer> bgWorker = new LoadGPTJSONWorker(selectedFile, conversationList); + @SuppressWarnings("unused") + TaskHandle bgHandle = bgBatch.monitorSwingWorker(bgWorker, "Loading conversations", false); + bgWorker.execute(); + } + + private GPTConversationDB parseConversationRaw(JSONObject conversation) { + return parseConversation(conversation, ParseMode.RAW); + } + + private GPTConversationDB parseConversation(JSONObject conversation, ParseMode mode) { + + String title = conversation.optString("title", "Untitled"); + String id = conversation.getString("id"); + + GPTConversationDB dbConversation = new GPTConversationDB(id, title); + + JSONObject mappings = conversation.getJSONObject("mapping"); + Iterator<String> mappingKeys = mappings.keys(); + + while (mappingKeys.hasNext()) { + String mappingKey = mappingKeys.next(); + JSONObject mapping = mappings.getJSONObject(mappingKey); + + /* These messages possibly need to be organized into a tree using the 'parent' / 'children' fields. + Might actually be easier to just store it in the DB and then reconstruct the tree from that + info later instead of trying to construct it fully in memory + */ + // Note on the tree thing: What we actually want to do is condense singular tree levels + + if (mapping.opt("message") == null) { + // No message, can ignore for now + continue; + } + + if (mode == ParseMode.RAW) { + String selfID = mappingKey; + String parentID = mapping.optString("parent", ""); + String rawMessage = mapping.toString(); + + RawMessageDB dbMessage = new RawMessageDB(selfID, id, rawMessage, parentID); + dbConversation.addRawMessage(dbMessage); + + continue; + } + // Also to consider, do we want to just store the raw messages into the DB? + // that will allow us to go back and re-parse them later + String selfID = mappingKey; + String parentID = mapping.optString("parent", ""); + // Consider if we should read the children + + JSONObject message = mapping.getJSONObject("message"); + + // A field to possibly read is 'recipient' and/or 'channel' which may do a better job + // at ID-ing messages that don't need to be visually shown + JSONObject messageMetadata = message.optJSONObject("metadata"); + if (messageMetadata == null + || messageMetadata.optBoolean("is_visually_hidden_from_conversation", false) == false) { + // Hidden message, skip + // NOTE: there do appear to be certain message that we may want to keep regardless + continue; + } + // Consider if we should grab turn_summary from the metadata + // Also, metadata has branching_from_conversation_id, branching_from_conversation_title etc + + // In metadata, we also have the attachments object, which contains info about attachments + + // Metadata also has the 'reasoning_status' / 'message_type' fields, but only some of the time. + + // Another metadata field is 'aggregate_result' which seems tied to code output in various ways + JSONObject authorData = message.getJSONObject("author"); + + String author = authorData.getString("role"); + // If the author is 'tool', that may need to get handled specially + JSONObject messageContent = message.getJSONObject("content"); + + String contentType = messageContent.getString("content_type"); + switch (contentType) { + // Should contentType get enum-ified? + case "text": { + // Text to integrate + // NOTE: thing to consider later is that a decent chunk of these text files are markdown + // and will need to be displayed that way + + // Also, text that is sent to python can be code that is executed + StringBuilder content = new StringBuilder(); + JSONArray messageParts = messageContent.getJSONArray("parts"); + int numParts = messageParts.length(); + for (int j = 0; j < numParts; j++) { + // I think we can just collate directly for this content type, but I'm not + // convinced it is the right behavior + content.append(messageParts.getString(j)); + } + } + case "reasoning_recap": + case "thoughts": + // Consider tying these to their associated message + // GPT metadata, worth recording, but not exposing + // Notably, `thoughts` seems like it might vary in form depending + // on the model or time of use + case "code": + // This contains code, and will likely need post-processing + // There is the 'language' tag for ID'ing languages + case "execution_output": + // This is also tied to code + default: + // Unknown content type + } + + // NOTE: Given some of the stuff, we probably need a fuller abstraction for this. + // The current one is Conversation -> Pairs of User/Assistant message + // However, we probably want to use distinct types for those so that we can properly + // associate all of the provided metadata to it + } + + return dbConversation; + } +}
\ No newline at end of file diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/SaveConversationTask.java b/firmal/src/main/java/bjc/firmal/gptbrowser/SaveConversationTask.java new file mode 100644 index 0000000..5aabe50 --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/SaveConversationTask.java @@ -0,0 +1,127 @@ +package bjc.firmal.gptbrowser; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Future; + +import javax.swing.SwingWorker; + +import bjc.firmal.gptbrowser.msginfo.RawMessageDB; +import bjc.functypes.ClosableThrowFunction; +import bjc.utils.misc.NamedPreparedStatement; +import bjc.utils.misc.NamedPreparedStatement.Args; + +/** + * Worker task to save a conversation to the DB + */ +final class SaveConversationTask extends SwingWorker<Void, Integer> { + private GPTConversationDB conversation; + private ClosableThrowFunction<List<NamedPreparedStatement.Args>, CompletableFuture<List<Integer>>, SQLException> insertConvFunc; + private ClosableThrowFunction<List<NamedPreparedStatement.Args>, CompletableFuture<List<Integer>>, SQLException> insertMessageFunc; + + private String note; + private CountDownLatch msgCounter; + + /** + * Create a DB save worker task + * @param conv The conversation to save + * @param convUpdate The DB function for conversation updates + * @param msgUpdate The DB function for message updates + * @param msgCounter The message counter + */ + public SaveConversationTask(GPTConversationDB conv, + ClosableThrowFunction<List<NamedPreparedStatement.Args>, CompletableFuture<List<Integer>>, SQLException> convUpdate, + ClosableThrowFunction<List<NamedPreparedStatement.Args>, CompletableFuture<List<Integer>>, SQLException> msgUpdate, + CountDownLatch msgCounter) { + this.conversation = conv; + this.insertConvFunc = convUpdate; + this.insertMessageFunc = msgUpdate; + this.msgCounter = msgCounter; + } + + @Override + protected Void doInBackground() throws Exception { + List<NamedPreparedStatement.Args> insertConvArgs = new ArrayList<>(1); + + NamedPreparedStatement.Args.Builder insertConvRecord = NamedPreparedStatement.Args.builder(); + insertConvRecord.setString("id", conversation.getID()); + insertConvRecord.setString("title", conversation.getTitle()); + insertConvArgs.add(insertConvRecord.build()); + + List<RawMessageDB> messages = conversation.getRawMessages(); + List<NamedPreparedStatement.Args> insertMessageArgs = new ArrayList<>(messages.size()); + + int totalNumMessages = messages.size(); + int currNumMessages = 0; + + for (RawMessageDB message : messages) { + NamedPreparedStatement.Args.Builder insertMessageRecord = NamedPreparedStatement.Args.builder(); + + if (isCancelled()) break; + + int currProgress = Math.min(100, (currNumMessages / totalNumMessages) * 100); + setProgress(currProgress); + + String parentID = message.getParentMessageID(); + if (conversation.hasSeenRawMessage(parentID)) { + String newNote = "Saving message " + currNumMessages + " of " + totalNumMessages; + firePropertyChange("note", note, newNote); + note = newNote; + + + insertMessageRecord.setString("selfid", message.getMessageID()); + insertMessageRecord.setString("convid", message.getConversationID()); + insertMessageRecord.setString("body", message.getMessageBody()); + // TODO figure out why we are getting constraint violations here. + // Do we really need to leave this null initially, then backfill it? + // Or do we need to be doing these as independent DB queries instead of batching them? + // That sounds rather inefficient, but so is doing a second pass to fill it later + // insertMessage.setString("parentid", message.getParentMessageID()); + insertMessageArgs.add(insertMessageRecord.build()); + } else { + // TODO: this message has a missing/incorrect parent link + } + } + + String newNote = "Starting save of " + GPTJSONBrowserFrame.BATCH_THRESHOLD + " messages/conversations to the DB"; + firePropertyChange("note", note, newNote); + note = newNote; + + // Save our changes for this conversation + Future<List<Integer>> insertConversationResults = insertConvFunc.apply(insertConvArgs); + + for (int i : insertConversationResults.get()) { + if (i != 0 && i != 1) { + // TODO: do something about an oddity + } + } + + Future<List<Integer>> insertMessageResults = insertMessageFunc.apply(insertMessageArgs); + for (int i : insertMessageResults.get()) { + if (i != 0 && i != 1) { + // TODO handle oddities + } + } + + newNote = "Saved " + GPTJSONBrowserFrame.BATCH_THRESHOLD + " messages/conversations to the DB"; + firePropertyChange("note", note, newNote); + + + return null; + } + + @Override + protected void done() { + msgCounter.countDown(); + } + + public String getNote() { + return note; + } + public void setNote(String note) { + this.note = note; + } +}
\ No newline at end of file diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/SaveConversationToDBListener.java b/firmal/src/main/java/bjc/firmal/gptbrowser/SaveConversationToDBListener.java new file mode 100644 index 0000000..b0092f6 --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/SaveConversationToDBListener.java @@ -0,0 +1,116 @@ +package bjc.firmal.gptbrowser; + +import java.awt.BorderLayout; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.sql.SQLException; +import java.util.Iterator; +import java.util.concurrent.CountDownLatch; + +import javax.swing.JButton; +import javax.swing.JDesktopPane; +import javax.swing.JDialog; +import javax.swing.JFrame; +import javax.swing.JLabel; + +import bjc.firmal.Firmal; +import bjc.utils.gui.panels.BatchTaskProgressPanel.BatchHandle; +import bjc.utils.misc.NamedPreparedStatement; + +final class SaveConversationToDBListener implements ActionListener { + /** + * + */ + private final GPTJSONBrowserFrame browserFrame; + private final JDesktopPane deskPane; + + SaveConversationToDBListener(GPTJSONBrowserFrame gptjsonBrowserFrame, JDesktopPane deskPane) { + browserFrame = gptjsonBrowserFrame; + this.deskPane = deskPane; + } + + @Override + public void actionPerformed(ActionEvent aev) { + Firmal fm = Firmal.fm; + + NamedPreparedStatement.Args.Builder insertConvShape = NamedPreparedStatement.Args.builder(); + insertConvShape.setString("id", null); + insertConvShape.setString("title", null); + + NamedPreparedStatement.Args.Builder insertMessageShape = NamedPreparedStatement.Args.builder(); + insertMessageShape.setString("selfid", null); + insertMessageShape.setString("convid", null); + insertMessageShape.setString("body", null); + + try { + var insertConversation = fm.createQueuedUpdater( + "insert into chatgpt.conversations (conversation_id, conversation_title)" + + " values (:id::uuid, :title) on conflict (conversation_id) do nothing", insertConvShape.build()); + var insertMessage = fm.createQueuedUpdater( + "insert into chatgpt.raw_messages (message_id, conversation_id, message_body) " + + "values (:selfid::uuid, :convid::uuid, :body::json)" + + " on conflict (message_id) do nothing", insertMessageShape.build()); + + Iterator<GPTConversationDB> conversations = browserFrame.conversationListModel.elements().asIterator(); + + BatchHandle saveBatch = fm.createTaskBatch("Save Raw ChatGPT Conversations to DB"); + + int totalConversations = browserFrame.conversationListModel.getSize(); + int currConversation = 0; + + CountDownLatch msgCounter = new CountDownLatch(totalConversations); + + while (conversations.hasNext()) { + currConversation++; + + GPTConversationDB conversation = conversations.next(); + + SaveConversationTask saveTask = new SaveConversationTask(conversation, insertConversation, insertMessage, msgCounter); + String taskDesc = "Saving conversation " + currConversation + " of " + totalConversations + ": " + conversation.getTitle(); + saveBatch.monitorSwingWorker(saveTask, taskDesc, true); + + saveTask.execute(); + } + + // Make sure our statements are cleaned up once we are done + Thread cleanupThread = new Thread(() -> { + try { + msgCounter.await(); + + insertMessage.close(); + insertConversation.close(); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + }); + cleanupThread.start(); + } catch (SQLException sqlex) { + JFrame mainFrame = null; + JDialog errorDialog = new JDialog(mainFrame, "Error interfacing with DB"); + + JLabel headerLabel = new JLabel("Error interfacing with database"); + + JLabel errorDetails = new JLabel(sqlex.getLocalizedMessage()); + + JButton okButton = new JButton("OK"); + okButton.addActionListener((aev2) -> { + errorDialog.dispose(); + }); + + errorDialog.add(BorderLayout.PAGE_START, headerLabel); + errorDialog.add(BorderLayout.PAGE_END, okButton); + errorDialog.add(BorderLayout.CENTER, errorDetails); + + errorDialog.pack(); + errorDialog.setVisible(true); + } catch (Exception e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + } +}
\ No newline at end of file diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/AIMessage.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/AIMessage.java new file mode 100644 index 0000000..47bdb64 --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/AIMessage.java @@ -0,0 +1,27 @@ +package bjc.firmal.gptbrowser.msginfo; + +/** + * Interface for representing an AI message. + * + * @author bjculkin + */ +public interface AIMessage { + /** + * Get the ID of this message + * + * @return The ID of this message + */ + public String getMessageID(); + /** + * Get the ID of the conversation this message is in + * + * @return The ID of the conversation for this message + */ + public String getConversationID(); + /** + * Get the ID of the previous message in the conversation this message is in. + * + * @return The ID for the previous message in the conversation, or null if there isn't one + */ + public String getParentMessageID(); +} diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/GPTMessage.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/GPTMessage.java index db35156..d7af841 100644 --- a/firmal/src/main/java/bjc/firmal/gptbrowser/GPTMessage.java +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/GPTMessage.java @@ -1,4 +1,4 @@ -package bjc.firmal.gptbrowser; +package bjc.firmal.gptbrowser.msginfo; public class GPTMessage { public static enum MessageAuthor { diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedCompoundMessagePart.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedCompoundMessagePart.java new file mode 100644 index 0000000..68ad73f --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedCompoundMessagePart.java @@ -0,0 +1,60 @@ +package bjc.firmal.gptbrowser.msginfo; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * Represents a compound parsed message containing other messages. + */ +public class ParsedCompoundMessagePart implements ParsedMessagePart { + private List<ParsedMessagePart> parts; + + @Override + public MessagePartType getType() { + return MessagePartType.COMPOUND; + } + + /** + * Create a new compound parsed message + * + * @param parts The parts to go into the parsed message + */ + public ParsedCompoundMessagePart(ParsedMessagePart... parts) { + this.parts = new ArrayList<>(parts.length); + + for (ParsedMessagePart part : parts) { + this.parts.add(part); + } + } + + /** + * Get the parts from this compound message + * @return The parts for this compound message + */ + public List<ParsedMessagePart> getParts() { + return parts; + } + + @Override + public int hashCode() { + return Objects.hash(parts); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ParsedCompoundMessagePart other = (ParsedCompoundMessagePart) obj; + return Objects.equals(parts, other.parts); + } + + @Override + public String toString() { + return "ParsedCompoundMessagePart [parts=" + parts + "]"; + } +} diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedMessage.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedMessage.java new file mode 100644 index 0000000..f8d5a6a --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedMessage.java @@ -0,0 +1,35 @@ +package bjc.firmal.gptbrowser.msginfo; + +import java.util.List; + +/** + * A parsed AI message + */ +public class ParsedMessage implements AIMessage { + // Basic message info + private String messageID; + private String conversationID; + private String parentMessageID; + + // Parsed message info - some of these should get enum'd + private String author; + private String contentType; + + private List<ParsedMessagePart> messageParts; + + @Override + public String getMessageID() { + return messageID; + } + + @Override + public String getConversationID() { + return conversationID; + } + + @Override + public String getParentMessageID() { + return parentMessageID; + } + +} diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedMessagePart.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedMessagePart.java new file mode 100644 index 0000000..38a02d4 --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedMessagePart.java @@ -0,0 +1,54 @@ +package bjc.firmal.gptbrowser.msginfo; + +/** + * A representation of the parts of a parsed message + */ +public interface ParsedMessagePart { + /** + * + */ + /** + * The type of the parsed message + */ + public static enum MessagePartType { + /** + * A parsed message containing text + */ + TEXT, + + /** + * A recap of the reasoning for a message + */ + REASONING_RECAP, + + /** + * The thoughts behind a message. + */ + THOUGHTS, + + /** + * A message consisting of code + */ + CODE, + /** + * A message consisting of the results of executing code + */ + EXECUTION_OUTPUT, + + /** + * A 'raw' parsed message, just containing JSON + */ + RAW, + + /** + * A parsed message containing other parsed messages + */ + COMPOUND + } + + /** + * Get the type of the parsed message + * @return The type of the parsed message + */ + public MessagePartType getType(); +} diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedRawMessagePart.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedRawMessagePart.java new file mode 100644 index 0000000..52cf859 --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedRawMessagePart.java @@ -0,0 +1,61 @@ +package bjc.firmal.gptbrowser.msginfo; + +import java.util.Objects; + +/** + * A 'raw' part for a parsed message + */ +public class ParsedRawMessagePart implements ParsedMessagePart { + private String rawContents; + + @Override + public MessagePartType getType() { + return MessagePartType.RAW; + } + + /** + * Create a new parsed 'raw' message + * @param rawContents The raw contents for the message + */ + public ParsedRawMessagePart(String rawContents) { + this.rawContents = rawContents; + } + + /** + * Get the raw contents of the message + * @return The raw contents of the message + */ + public String getRawContents() { + return rawContents; + } + + /** + * Set the raw contents of the message + * @param rawContents The raw contents of the message + */ + public void setRawContents(String rawContents) { + this.rawContents = rawContents; + } + + @Override + public int hashCode() { + return Objects.hash(rawContents); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ParsedRawMessagePart other = (ParsedRawMessagePart) obj; + return Objects.equals(rawContents, other.rawContents); + } + + @Override + public String toString() { + return "ParsedRawMessagePart [rawContents=" + rawContents + "]"; + } +} diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedTextMessagePart.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedTextMessagePart.java new file mode 100644 index 0000000..586ac0a --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/ParsedTextMessagePart.java @@ -0,0 +1,67 @@ +package bjc.firmal.gptbrowser.msginfo; + +import java.util.Objects; + +/** + * A parsed message containing normal text + */ +public class ParsedTextMessagePart implements ParsedMessagePart { + private String contents; + + @Override + public MessagePartType getType() { + return MessagePartType.TEXT; + } + + /** + * Create a parsed text message + * @param contents The contents for the text message + */ + public ParsedTextMessagePart(String contents) { + super(); + + this.contents = contents; + } + + + /** + * Get the text contents of the message + * @return The text contents of the message + */ + public String getContents() { + return contents; + } + + /** + * Set the text contents of the message + * @param contents The text contents of the message + */ + public void setContents(String contents) { + this.contents = contents; + } + + + @Override + public int hashCode() { + return Objects.hash(contents); + } + + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ParsedTextMessagePart other = (ParsedTextMessagePart) obj; + return Objects.equals(contents, other.contents); + } + + + @Override + public String toString() { + return "ParsedTextMessagePart [contents=" + contents + "]"; + } +} diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/RawMessageDB.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/RawMessageDB.java index dd31204..3bb3fd5 100644 --- a/firmal/src/main/java/bjc/firmal/gptbrowser/RawMessageDB.java +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/RawMessageDB.java @@ -1,6 +1,13 @@ -package bjc.firmal.gptbrowser; +package bjc.firmal.gptbrowser.msginfo; -public class RawMessageDB { +import java.util.Objects; + +/** + * A 'raw' representation of a AI message + * + * @author bjculkin + */ +public class RawMessageDB implements AIMessage { private String messageID; private String conversationID; private String messageBody; @@ -9,28 +16,41 @@ public class RawMessageDB { public String getMessageID() { return messageID; } + public void setMessageID(String messageID) { this.messageID = messageID; } + public String getConversationID() { return conversationID; } + public void setConversationID(String conversationID) { this.conversationID = conversationID; } + public String getMessageBody() { return messageBody; } + public void setMessageBody(String messageBody) { this.messageBody = messageBody; } + public String getParentMessageID() { return parentMessageID; } + public void setParentMessageID(String parentMessageID) { this.parentMessageID = parentMessageID; } + + public RawMessageDB(String messageID) { + super(); + this.messageID = messageID; + } + public RawMessageDB(String messageID, String conversationID, String messageBody, String parentMessageID) { super(); this.messageID = messageID; @@ -38,6 +58,22 @@ public class RawMessageDB { this.messageBody = messageBody; this.parentMessageID = parentMessageID; } - - + + @Override + public int hashCode() { + return Objects.hash(conversationID, messageBody, messageID, parentMessageID); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + RawMessageDB other = (RawMessageDB) obj; + return Objects.equals(conversationID, other.conversationID) && Objects.equals(messageBody, other.messageBody) + && Objects.equals(messageID, other.messageID) && Objects.equals(parentMessageID, other.parentMessageID); + } } diff --git a/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/package-info.java b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/package-info.java new file mode 100644 index 0000000..c489dd8 --- /dev/null +++ b/firmal/src/main/java/bjc/firmal/gptbrowser/msginfo/package-info.java @@ -0,0 +1 @@ +package bjc.firmal.gptbrowser.msginfo;
\ No newline at end of file |
