27th April 2020

SQL Datamodel For J-Pilot

Currently J-Pilot stores its data in binary form which is compatible with the original Palm data format. See Palm File Format Specification. Reading these binary formats is not simple, see for example pdbrd.c. Portion of the internal structure looks something like this:

typedef struct {        // header of record list
        LocalID nextRecordListID;
        UInt16 numRecords;
        UInt16 firstEntry;
} RecordListType;

typedef struct {        // single element of record list
        LocalID localChunkID;   // offset, start of actual data
        UInt8 attributes;       // 1st nibble: ?/private, 2nd: this is the category
        UInt8 uniqueID[3];
} RecordEntryType;

typedef struct {
        UInt16 renamedCategories;
        char categoryLabels[16][16];
        UInt8 categoryUniqIDs[16];
        UInt16 lastUniqID_pad;  //UInt8 lastUniqID; UInt8 padding;
} AppInfoType;

In 2013 I proposed to use SQLite as internal data format instead, see Possible Enhancements to J-Pilot.

Below datamodel closely follows the field declaration in J-Pilot and Pilot-Link, e.g., /usr/include/pi-datebook.h or utils.h.

create table AddrLabel (
    Id            int primary key,
    Label         text

-- Labels for address categories, like 'Business', 'Travel', etc.
create table AddrCategory (
    Id            int primary key,
    Label         text

-- Labels for address phone entries, like 'Work', 'Mobile', etc.
create table PhoneLabel (
    Id            int primary key,
    Label         text

-- Actual address information
create table Addr (
    Id            int primary key, -- unique_ID
    Category      int default(0),
    Private       int default(0),  -- boolean, zero or one
    showPhone     int default(1),  -- which of phone1...5 to show as default
    Lastname      text,
    Firstname     text,
    Title         text,
    Company       text,
    Note          text,
    foreign key (Category) references AddrCategory(Id),
    foreign key (PhoneLabel1) references PhoneLabel(Id),
    foreign key (PhoneLabel2) references PhoneLabel(Id),
    foreign key (PhoneLabel3) references PhoneLabel(Id),
    foreign key (PhoneLabel4) references PhoneLabel(Id),
    foreign key (PhoneLabel5) references PhoneLabel(Id)

create table Datebook (
    Id            int primary key,
    Private       int default(0),  -- boolean, zero or one
    Timeless      int default(0),  -- boolean, zero or one
    Begin         text,            -- begin date in format YYYY-MM-DDTHH:MM
    End           text,            -- end date in format YYYY-MM-DDTHH:MM
    Alarm         int,             -- boolean, zero or one
    Description   text,
    Note          text

-- Labels for ToDo categories, like 'Business', 'Personal', etc.
create table ToDoCategory (
    Id            int primary key,
    Label         text

create table ToDo (
    Id            int primary key,
    Category      int default(0),
    Private       int default(0),  -- boolean, zero or one
    Indefinite    int default(0),  -- boolean, zero or one
    Due           text,            -- due date in format YYYY-MM-DD
    Priority      int default(1),
    Complete      int,             -- boolean, zero or one
    Description   text,
    Note          text,
    foreign key (Category) references ToDoCategory(Id)

-- Labels for memo categories, like 'Business', 'Personal', etc.
create table MemoCategory (
    Id            int primary key,
    Label         text

create table Memo (
    Id            int primary key,
    Category      int default(0),
    Private       int default(0),  -- boolean, zero or one
    Text          text,
    foreign key (Category) references MemoCategory(Id)

-- Labels for expense categories, like 'Project A', 'Internal', etc.
create table ExpenseCategory (
    Id            int primary key,
    Label         text

-- Labels for expense types, like 'airfaire', 'car rental', etc.
create table ExpenseType (
    Id            int primary key,
    Label         text
-- Taken from /usr/include/pi-expense.h
insert into ExpenseType (Id,Label) values (0,'Airfare');
insert into ExpenseType (Id,Label) values (1,'Breakfast');
insert into ExpenseType (Id,Label) values (27,'Train');

-- Labels for expense payments, like 'cash', 'Visa', etc.
create table ExpensePayment (
    Id            int primary key,
    Label         text
-- Taken from /usr/include/pi-expense.h
insert into ExpensePayment (Id,Label) values (1,'AmEx');
insert into ExpensePayment (Id,Label) values (2,'Cash');
insert into ExpensePayment (Id,Label) values (7,'Visa');
insert into ExpensePayment (Id,Label) values (8,'Unfiled');

-- Labels for expense currency, like 'US', 'Germany', etc.
create table ExpenseCurrency (
    Id            int primary key,
    Label         text
-- Taken from Expense/expense.c
insert into ExpenseCurrency (Id,Label) values (0,'Australia');
insert into ExpenseCurrency (Id,Label) values (1,'Austria');
insert into ExpenseCurrency (Id,Label) values (2,'Belgium');
insert into ExpenseCurrency (Id,Label) values (23,'United States');

create table Expense (
    Id            int primary key,
    Category      int default(0),
    Date          text,            -- date in format YYYY-MM-DD
    Type          int,             -- 0=airfare, 1=breakfast, etc.
    Payment       int,             -- 0=AmEx, 1=Cash, etc.
    Currency      int,
    Amount        text,
    Vendor        text,
    City          text,
    Attendees     text,
    Note          text,
    foreign key (Category) references ExpenseCategory(Id),
    foreign key (Type) references ExpenseType(Id),
    foreign key (Payment) references ExpensePayment(Id),
    foreign key (Currency) references ExpenseCurrency(Id)

It has three advantages A1, A2, and A3 to have the format in an SQL schema. Although, there is one disadvantage D1.

A1. It is very easy to have more than 16 categories. Currently the binary format limits the number of categories to 16 as this number is stored in one nibble. It is also very easy to add more fields, regardless whether they are shown on screen or not.

A2. Having the data format stored in SQLite makes it easy to search in ones own data. For example, searching for specific datebook entries:

sqlite3 $HOME/.jpilot/jptables.db "select Begin, Description from Datebook where Description like '%$1%' order by Begin"

So we can use the command line to query our J-Pilot data. As illustration, using above query with string "jpsqlite" would result in:

2020-04-17T10:00|jpsqlite.c: Lege SQLite3 Tabellen mittels jptables.sql an
2020-04-17T14:00|jpsqlite.c: Schreibe in plugin_pre_sync_pre_connect() in die beiden Tabellen AddrCategory + Addr
2020-04-18T10:00|jpsqlite.c: Schreibe in Tabelle PhoneLabel, füge phoneLabel[0..7] in Addr hinzu

Similarly, searching in J-Pilot Memos from the command line:

sqlite3 $HOME/.jpilot/jptables.db "select Text from Memo where Text like '%$1%'" | grep -iC3 $1

Using something similar with the original J-Pilot binary format would involve using strings and grep command.

We are not tied to the command line. Equally well we can use a graphical visualization of our database, for example, using DB Browser for SQLite. An Arch Linux package is here. A screenshot with above schema is above.

CSV export is also easy, as this is a standard export format in SQLite (.mode csv). Also cleansing data, where there is a common error in multiple data, is easy. This SQLite internal format can then be the base for syncing multiple devices, for example a PC, a smartphone and a tablet. This can be accomplished as described below.

D1. In contrast to the internal binary format, the SQLite format is expected to consume more storage. The binary format keeps data packed together. In our case we observed that SQLite needs roughly two times the storage: The internal format used roughly 3 MB, SQLite used roughly 6 MB.

A3. Once the internal data structure in J-Pilot uses SQLite, it becomes easier to implement a "multi-sync" feature, i.e., hot-syncing data with multiple devices. In particular, synchronisation between two devices can now be done in an SQL transaction:

    ...    -- sync data

For those unfamiliar with the wording in Palm Pilot speech: HotSyncing is the process, where the data from your PC is synchronized with the data on your PalmPilot. It works both ways, i.e., changes on the PC are transfered to the Palm Pilot, and vice versa, changes on the Palm Pilot are copied back to the PC. There is also a form of collision handling, if both, PC and Palm Pilot, have changed the same record.

1. Assumptions. Assume we have three devices, i.e., PC P, smartphone S1, smartphone S2, e.g., S2 could be a tablet. Only P serves as HotSync server, i.e., S1 syncs to P, and S2 syncs to P. But S1 does not sync to S2.

2. Sync table. Data structure on each device, i.e., what table data needs to be stored on each device. For each change (insert, update, delete) in any entity (Datebook, Addr, etc.) we need to store information in another Sync table:

  1. t: Timestamp of change
  2. e: Entity, i.e., address, datebook, memo, etc.
  3. Id: Id of entity table
  4. C: Change-Type (insert, update, delete)
  5. B: Bitfield, where each participating device is crossed out, P=0x01, S1=0x02, S2=0x04
  6. D: concatenated entry of previous record. For example: for a deleted record the record to be deleted itself will be stored. For an updated record, the record prior to its update is stored.

3. Collision Cases. Discuss the various scenarios where data in the devices differ and what to do in each case.

  1. P has new data (C=insert), which S1 does not have, or vice versa: Copy from P to S1, or vice versa.
  2. P has deleted a record that S1 still has untouched, or vice versa: Delete on S1 as well, or delete on P.
  3. P has modified data (C=update), which S1 has also modified (C=update): Use timestamp t to decide which side overrules the other. If S1 is the winner, then store the change in Sync table on P as well, so further hot-syncs with S2 can pick up the changes.
  4. P has new data (C=insert) with Id=x, and S1 has also new data with identical Id=x: Keep x on S1, and update Id=x to Id=y on P, i.e., change Id in underlying entity table and Sync table. Usually y=x+1. Rest of processing proceeds as in first case.

If bitfield B contains all participating devices, i.e., B=0x07, then the record in Sync table can be deleted as it is no longer required.

4. Id-Management. The Id column in above tables is just a consecutive number which is taken from the underlying entity table, for example it is Addr.Id. This is similar to the current Id-Management in J-Pilot. For this whole synchronization to work, P, S1, and S2 must be initialized with data having common Id's. They do not necessarily have to be numeric, as in our case. But having them numeric is probably very simple.

The export formats for Datebook, Address, and To-Do's allow and already use UID. J-Pilot encapsulates the Id into this UID field. Memo, and Expense do not have these IETF RFC approved formats. For example, address export as vcard contains the Id in hexadecimal form using J-Pilot export.

PRODID:-//Judd Montgomery//NONSGML J-Pilot 1.8.2//EN
FN:Palm III Accessories
N:Palm III Accessories;
ORG:3Com Corporation

5. Optimizations. If data in Sync table contains a newly created record (C=insert), which is then deleted (C=delete) and bitfield B in both cases is equal to the device at hand, then these two records can be deleted from the Sync table. For example, if on P the value of B=P, then these two records including all records in between referring to the same Id, can be deleted in Sync table.

As from above description it follows that D is not strictly required.

6. ER diagram. The above tables can be visualized in below ER diagram.

erDiagram AddrLabel { int Id string Label } AddrCategory { int Id string Label } PhoneLabel { int Id string Label } AddrCategory ||--o{ Addr : "FK: Category" PhoneLabel ||--o{ Addr : "FK: PhoneLabel1-5" Addr { int Id int Category int Private int showPhone string Lastname string Firstname string Title string Company string PhoneLabel1-5 string Phone1-5 string Address string City string State string Zip string Country string Custom1-4 string Note }
erDiagram Datebook { int Id int Private int Timeless string Begin string End int Alarm int Advance int AdvanceUnit int RepeatType int RepeatForever string RepeatEnd int RepeatFreq int RepeatDay int RepeatDaySu int RepeatDayMo int RepeatDayTu int RepeatDayWe int RepeatDayTh int RepeatDayFr int RepeatDaySa string Description string Note } ToDoCategory { int Id string Label } ToDoCategory ||--o{ ToDo : "FK: Category" ToDo { int Id int Category int Private int Indefinite string Due int Priority int Complete string Description string Note } MemoCategory { int Id string Label } MemoCategory ||--o{ Memo : "FK: Category" Memo { int Id int Category int Private string Text }
erDiagram ExpenseCategory { int Id string Label } ExpenseType { int Id string Label } ExpensePayment { int Id string Label } ExpenseCurrency { int Id string Label } ExpenseCategory ||--o{ Expense : "FK: Category" ExpenseType ||--o{ Expense : "FK: Type" ExpensePayment ||--o{ Expense : "FK: Payment" ExpenseCurrency ||--o{ Expense : "FK: Currency" Expense { int Id int Category string Date int Type int Payment int Currency string Amount string Vendor string City string Attendees string Note }

Some tables just contain constants.

0Last name
1First name
11Zip Code
15Custom 2
17Custom 4
133EU (Euro)
9Hong Kong
22United Kingdom
23United States
4Credit Card
3Business Meals
4Car Rental

The two database tables marked with (*) are not initially loaded with INSERT statements as the other database tables in above table.