API Reference

Intake

backfill(start, end, body_id)

Extract old documents and fill db with summaries

Parameters:
  • start (str) –

    start date in ISO format

  • end (str) –

    end date in ISO format

  • body_id (int) –

    numerical ID of the government body to filter for (In Windsor Town Council is 18)

Returns:
  • assignments( lis ) –

    list of assignment data in the same format as get_rss() output.

Source code in wuiw/intake.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def backfill(start, end, body_id):
    """Extract old documents and fill db with summaries

    Args:
        start (str): start date in ISO format
        end (str): end date in ISO format
        body_id (int): numerical ID of the government body to filter for (In Windsor Town Council is 18)

    Returns:
        assignments (lis): list of assignment data in the same format as get_rss() output.
    """
    assignments = []
    base_url = "https://www.windsorct.gov/AgendaCenter/ViewFile/Agenda/_"
    start_date = datetime.date.fromisoformat(start)
    end_date = datetime.date.fromisoformat(end)
    # construct url and issue requests.get() 
    url = f"https://www.windsorct.gov/AgendaCenter/Search/?term=&CIDs={body_id},&startDate={start_date.month:02d}/{start_date.day:02d}/{start_date.year}&endDate={end_date.month:02d}/{end_date.day:02d}/{end_date.year}&dateRange=&dateSelector="
    # HTTP request
    response = requests.get(url, headers=HEADERS)

    if response.status_code != 200:
        logger.error("Backfill request failed: %s", response.status_code)
        return []

    time.sleep(REQUEST_DELAY)
    soup = BeautifulSoup(response.text, 'html.parser')
    leads = soup.select('td p a[id]')
    for lead in leads:
        body = classify(lead.text, MUNICIPAL_BODIES)
        body = "_".join(body.lower().split())
        meeting_type = classify(lead.text, MEETING_TYPES)
        month = lead["id"][0:2]
        day = lead["id"][2:4]
        year = lead["id"][4:8]
        published_date = f"{year}-{month}-{day}"
        meeting_id = f"{body}_{lead['name']}_{year}"
        materials = f"{base_url}{lead['id']}?html=true"

        assignments.append({
            "meeting_id": meeting_id,
            "meeting_type": meeting_type,
            "body": body,
            "published_date": published_date,
            "materials": materials
        })

    return assignments

get_rss(rss_url)

Retrieves new meeting info from Town RSS feed

Parameters:
  • rss_url (str) –

    hyperlink to RSS feed

Raises:
  • Exception

    when HTTP response != 200

Returns:
  • new_entries( lis ) –

    list of new entries, each entry is a flat dict with keys { "meeting_id": composite_id, "meeting_type": meeting_type, "body": body, "published_date": pub_date, "materials": url }

Source code in wuiw/intake.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def get_rss(rss_url):
    """Retrieves new meeting info from Town RSS feed

    Args:
        rss_url (str): hyperlink to RSS feed

    Raises:
        Exception: when HTTP response != 200

    Returns:
        new_entries (lis): list of new entries, each entry is a flat dict with keys {
                "meeting_id": composite_id,
                "meeting_type": meeting_type,
                "body": body,
                "published_date": pub_date,
                "materials": url
                }
    """

    feed = feedparser.parse(rss_url, agent=USER_AGENT, modified=None)
    civic_log.record(datetime.datetime.now(), rss_url, feed.status)

    if feed.status == 304:
        logger.info("STATUS: %s; No updates", feed.status)
        return {}

    if feed.status != 200:
        raise Exception("Feed error: %s", feed.status)

    # Parse the feed
    logger.info("STATUS: %s; Parsing new data", feed.status)
    new_entries = []
    for entry in feed.entries:
        try:    
            id_parts = entry["id"].split("/")
            meeting_id = id_parts[-2]
            title = entry["title"]
            body = classify(title, MUNICIPAL_BODIES)
            body = "_".join(body.lower().split())
            meeting_type = classify(title, MEETING_TYPES, meeting_type_fallback=True)
            year = entry["published_parsed"][0]
            month = entry["published_parsed"][1]
            day = entry["published_parsed"][2]
            pub_date = datetime.date(year, month, day).isoformat()

            composite_id = f"{body}_{meeting_id}_{year}"
            url = (
                f"https://www.windsorct.gov/AgendaCenter/"
                f"ViewFile/Agenda/_{month:02d}{day:02d}{year}-{meeting_id}?html=true"
            )

            new_entries.append({
                "meeting_id": composite_id,
                "meeting_type": meeting_type,
                "body": body,
                "published_date": pub_date,
                "materials": url
                })

        except KeyError as e:
            logger.warning("bad entry: %s", e)
            continue

    return new_entries

Editor

approve_article(meeting_id, reviewed=True)

toggles reviewed boolean on an assignment

Source code in wuiw/editor.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
def approve_article(meeting_id, reviewed=True):
    """toggles reviewed boolean on an assignment"""
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        cur.execute("""UPDATE assignments SET reviewed = %s
                    WHERE meeting_id = %s""",
                    (reviewed, meeting_id))
        conn.commit()
    except Exception as e:
        conn.rollback()
        logger.warning(f"{e}")
        raise
    finally:
        if cur: cur.close()
        if conn: conn.close()

assign()

Review db for assignments with status=STATUS_PENDING and return them

Returns:
  • assignments( lis ) –

    list of pending assignments to be sent to reporter

Source code in wuiw/editor.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def assign():
    """Review db for assignments with status=STATUS_PENDING and return them

    Returns:
        assignments (lis): list of pending assignments to be sent to reporter
    """
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
        cur.execute(
            """SELECT assignments.meeting_id, meeting_type, materials, status
            FROM assignments
            LEFT JOIN articles ON assignments.meeting_id = articles.meeting_id
            WHERE assignments.status = 'pending' AND articles.meeting_id IS NULL;
            """
        )
        assignments = cur.fetchall()
        conn.commit()
    except Exception as e:
        conn.rollback()
        raise
    finally:
        if cur: cur.close()
        if conn: conn.close()

    for assignment in assignments:
        update_status(assignment['meeting_id'], STATUS_ASSIGNED)

    return assignments

publish_article(meeting_id, published=True)

toggles published boolean on an assignment

Source code in wuiw/editor.py
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
def publish_article(meeting_id, published=True):
    """toggles published boolean on an assignment"""
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        cur.execute("""UPDATE assignments SET published = %s
                    WHERE meeting_id = %s""",
                    (published, meeting_id))
        conn.commit()
    except Exception as e:
        conn.rollback()
        logger.warning(f"{e}")
        raise
    finally:
        if cur: cur.close()
        if conn: conn.close()

report_error(meeting_id, error_text)

inserts a new row to error_reports

Parameters:
  • meeting_id (str) –

    composit meeting id

  • error_text (str) –

    error message from the POST '/report-error' route

Source code in wuiw/editor.py
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
def report_error(meeting_id, error_text):
    """inserts a new row to error_reports

    Args:
        meeting_id (str): composit meeting id
        error_text (str): error message from the POST '/report-error' route
    """
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        cur.execute("""INSERT INTO error_reports (meeting_id, report_text) VALUES (%s, %s)""",
                    (meeting_id, error_text))
        conn.commit()
    except Exception as e:
        conn.rollback()
        logger.warning(f"{e}")
        raise
    finally:
        if cur: cur.close()
        if conn: conn.close()

save_ai_log(logs)

save outgoing request logs to ai providers

Parameters:
  • logs (lis) –

    list of tuples returned by ai_log.info

Source code in wuiw/editor.py
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def save_ai_log(logs):
    """save outgoing request logs to ai providers

    Args:
        logs (lis): list of tuples returned by ai_log.info
    """
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        for log in logs:
            try:
                cur.execute("""
                            INSERT INTO ai_requests (run_id, timestamp, provider, status, input_tokens, output_tokens)
                            VALUES (%s, %s, %s, %s, %s, %s)""",
                            (log[0], log[1], log[2], log[3], log[4], log[5]))
                conn.commit()
            except Exception as e:
                conn.rollback()
                logger.warning(f"{e}")
                continue
    finally:
        if cur: cur.close()
        if conn: conn.close()

save_articles(articles)

Recieve articles from writer.write_article() and add them to the articles table

Parameters:
  • articles (lis) –

    list of dicts containing article data ("headline", "summary", etc...)

Source code in wuiw/editor.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def save_articles(articles):
    """Recieve articles from writer.write_article() and add them to the articles table

    Args:
        articles (lis): list of dicts containing article data ("headline", "summary", etc...)
    """
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        for article in articles:
            cur.execute(
                """
                INSERT INTO articles (meeting_id, meeting_date, byline, doc_type, summary)
                VALUES  (%s, %s, %s, %s, %s)
                ON CONFLICT (meeting_id, doc_type) DO UPDATE SET
                    summary = EXCLUDED.summary,
                    meeting_date = EXCLUDED.meeting_date
                """,
                (article['meeting_id'], article['meeting_date'], article['byline'], article["doc_type"], json.dumps(article['summary']))
            )
        conn.commit()
    except Exception as e:
        conn.rollback()
        raise
    finally:
        if cur: cur.close()
        if conn: conn.close()

save_assignments(rss_assignments, run_id=None)

Add new assignments from intake.get_rss() to the assignments table

Parameters:
  • rss_assignments (lis) –

    List of assignments produced by either intake.get_rss() or intake.backfill()

  • run_id (int, default: None ) –

    global run id associated with the cron run

Source code in wuiw/editor.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def save_assignments(rss_assignments, run_id=None):
    """Add new assignments from intake.get_rss() to the assignments table

    Args:
        rss_assignments (lis): List of assignments produced by either intake.get_rss() or intake.backfill()
        run_id (int): global run id associated with the cron run
    """
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        for assignment in rss_assignments:
            cur.execute(
                """INSERT INTO assignments (meeting_id, meeting_type, body, published_date, materials, last_run_id)
                VALUES (%s, %s, %s, %s, %s, %s)
                ON CONFLICT (meeting_id) DO UPDATE SET
                    meeting_type=EXCLUDED.meeting_type,
                    materials=EXCLUDED.materials,
                    body=EXCLUDED.body,
                    published_date=EXCLUDED.published_date,
                    status='pending',
                    last_run_id=EXCLUDED.last_run_id
                WHERE assignments.materials != EXCLUDED.materials
                """, (assignment['meeting_id'], assignment['meeting_type'], assignment['body'], assignment['published_date'], assignment['materials'], run_id)
                )
        conn.commit()
    except Exception as e:
        conn.rollback()
        raise
    finally:
        if cur: cur.close()
        if conn: conn.close()

save_civic_log(logs)

save outgoing request logs to town servers

Parameters:
  • logs (lis) –

    list of tuples returned by civic_log.info

Source code in wuiw/editor.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def save_civic_log(logs):
    """save outgoing request logs to town servers

    Args:
        logs (lis): list of tuples returned by civic_log.info
    """
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        for log in logs:
            try:
                cur.execute("""
                            INSERT INTO civic_requests (run_id, timestamp, url, response_status)
                            VALUES (%s, %s, %s, %s)""",
                            (log[0], log[1], log[2], log[3]))
                conn.commit()
            except Exception as e:
                conn.rollback()
                logger.warning(f"{e}")
                continue
    finally:    
        if cur: cur.close()
        if conn: conn.close()

send_alert(error)

Email the developer when the wuiw.main routine fails

Parameters:
  • error (str) –

    error message

Source code in wuiw/editor.py
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def send_alert(error):
    """Email the developer when the wuiw.main routine fails

    Args:
        error (str): error message
    """
    sender = os.getenv("ALERT_EMAIL")
    password = os.getenv("ALERT_EMAIL_PASSWORD")
    recipient = os.getenv("ALERT_EMAIL")  # send to yourself

    msg = MIMEText(f"WUIW pipeline failed.\n\nError: {error}")
    msg["Subject"] = "WUIW Pipeline Failure"
    msg["From"] = sender
    msg["To"] = recipient

    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
            server.login(sender, password)
            server.sendmail(sender, recipient, msg.as_string())
        logger.info("Failure alert sent")
    except Exception as e:
        logger.error("Failed to send alert: %s", e)

update_article(meeting_id, updates, resolved=False)

saves edits, sets reviewed = True, optionally resolves all open error reports updates is type dict and is constructed by the POST route when updates are submitted

Parameters:
  • meeting_id (str) –

    composite meeting id (example: town_council_1234_2025)

  • updates (dict) –

    constructed by the form in the POST '/admin/articles/' route { "assignment": { "meeting_type": "Regular Meeting" }, "article": { "agenda": { "items": ["items"] }, "minutes": { "meeting_date": "2025-03-01", "headline": "headline", "bullets": ["bullets"], "blurb": "blurb" } } }

  • resolved (bool, default: False ) –

    Toggles resolved status. Defaults to False.

Source code in wuiw/editor.py
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def update_article(meeting_id, updates, resolved=False):
    """saves edits, sets reviewed = True, optionally resolves all open error reports
    updates is type dict and is constructed by the POST route when updates are submitted

    Args:
        meeting_id (str): composite meeting id (example: town_council_1234_2025)
        updates (dict): constructed by the form in the POST '/admin/articles/<meeting_id>' route
            {
            "assignment": {
                "meeting_type": "Regular Meeting"
            },
            "article": {
                "agenda": {
                    "items": ["items"]
                },
                "minutes": {
                    "meeting_date": "2025-03-01",
                    "headline": "headline",
                    "bullets": ["bullets"],
                    "blurb": "blurb"
                    }
                }
            }
        resolved (bool, optional): Toggles resolved status. Defaults to False.

    """
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        # update assignments.meeting_type
        cur.execute("""UPDATE assignments SET meeting_type = %s WHERE meeting_id = %s""",
                    (updates["assignment"]["meeting_type"], meeting_id))

        # update articles.meeting_date and articles.summary
        for doc_type, data in updates["article"].items():
            if doc_type == "agenda":
                pass

            if doc_type == "minutes":
                cur.execute("""UPDATE articles SET
                            summary = %s,
                            meeting_date = %s
                            WHERE meeting_id = %s AND doc_type = %s""",
                            (json.dumps(data), data["meeting_date"], meeting_id, doc_type))

            if doc_type == "voting_grid":
                pass

        # update assignments.reviewed
        cur.execute("""UPDATE assignments SET reviewed = TRUE
                    WHERE meeting_id = %s""",
                    (meeting_id,))

        # update error_reports.status
        if resolved:
            cur.execute("""UPDATE error_reports SET resolved = TRUE
                        WHERE meeting_id = %s""",
                        (meeting_id,))

        conn.commit()
    except Exception as e:
        conn.rollback()
        logger.warning(f"{e}")
        raise
    finally:
        if cur: cur.close()
        if conn: conn.close()

update_status(meeting_id, status, error_message=None)

Update status of an assignment in the database

Parameters:
  • meeting_id (str) –

    composite meeting id

  • status (str) –

    assignment status as prescribed in config

  • error_message (str, default: None ) –

    message with info about errors that occured in this assignment upstream

Source code in wuiw/editor.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def update_status(meeting_id, status, error_message=None):
    """Update status of an assignment in the database

    Args:
        meeting_id (str): composite meeting id
        status (str): assignment status as prescribed in config
        error_message (str): message with info about errors that occured in this assignment upstream
    """
    conn = None
    cur = None
    try:
        conn = get_db_connection()
        cur = conn.cursor()
        if error_message:
            cur.execute("UPDATE assignments SET status = %s, error_message = %s where meeting_id=%s", (status, error_message, meeting_id))
        else:
            cur.execute("UPDATE assignments SET status = %s, error_message = NULL WHERE meeting_id = %s", (status, meeting_id))
        conn.commit()
    except Exception as e:
        conn.rollback()
        raise
    finally:
       if cur: cur.close()
       if conn: conn.close()

Log

AIRequestLog

Module Level Singleton for logging requests to AI provider API

Source code in wuiw/log.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class AIRequestLog:
    """Module Level Singleton for logging requests to AI provider API
    """
    def __init__(self):
        self.run_id = None
        self.info = []

    def set_run_id(self, run_id):
        self.run_id = run_id

    def record(self, timestamp, provider, status, input_tokens, output_tokens):
        self.info.append((self.run_id, timestamp, provider, status, input_tokens, output_tokens))

    def reset(self):
        self.run_id = None
        self.info.clear()

CivicRequestLog

Module Level Singleton for logging HTTP requests to civic urls

Source code in wuiw/log.py
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
class CivicRequestLog:
    """Module Level Singleton for logging HTTP requests to civic urls
    """
    def __init__(self):
        self.run_id = None
        self.info = []

    def set_run_id(self, run_id):
        self.run_id = run_id

    def record(self, timestamp, url, status):
        self.info.append((self.run_id, timestamp, url, status))

    def reset(self):
        self.run_id = None
        self.info.clear()

Reporter

fetch_documents(url, doc_type=None)

Use beautiful soup to parse html for urls to pdf(s)

Parameters:
  • url (str) –

    link to town documents

  • doc_type (lis, default: None ) –

    specifies which type of docs to get

Returns:
  • documents( tup ) –

    tuple with items (documents list, status, error message)

Source code in wuiw/reporter.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def fetch_documents(url, doc_type=None):
    """Use beautiful soup to parse html for urls to pdf(s)

    Args:
        url (str): link to town documents
        doc_type (lis): specifies which type of docs to get

    Returns:
        documents (tup): tuple with items (documents list, status, error message)
    """
    response = requests.get(url, headers=HEADERS)
    civic_log.record(datetime.now(), url, response.status_code)
    time.sleep(REQUEST_DELAY)

    if response.status_code != 200:
        logger.warning(f"materials url returned {response.status_code}")
        return ({}, STATUS_FAILED, f"materials get returned {response.status_code}")

    documents = {}
    soup = BeautifulSoup(response.text, 'html.parser')
    items = soup.find_all('div', class_='item level1')
    print(f"found {len(items)} documents to parse")

    target_docs = {}
    for item in items:
        title = item.find('h1', class_='title').text.strip()
        detected_type = classify(title, DOCUMENT_TYPES, doc_type_fallback=True)
        href = item.find('a')['href']
        doc_url = href if href.startswith("http") else f"https://www.windsorct.gov{href}"
        target_docs[detected_type] = doc_url

    keys = [doc_type] if doc_type is not None else target_docs.keys()

    for key in keys:
        if key not in target_docs:
            logger.warning(f"doc_type {key} not in materials")
            continue

        response_pdf = requests.get(target_docs[key], headers=HEADERS)
        civic_log.record(datetime.now(), target_docs[key], response_pdf.status_code)
        time.sleep(REQUEST_DELAY)

        if response_pdf.status_code != 200:
            logger.warning(f"No pdf returned for {key}; status: {response_pdf.status_code}")
            continue

        pdf_stream = io.BytesIO(response_pdf.content)
        text = _transcribe_doc(pdf_stream)
        documents[key] = text

    return (documents, STATUS_ASSIGNED, None)

Journalist

AnthropicProvider

Class to instantiate Anthropic API client

Source code in wuiw/journalist.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
class AnthropicProvider:
    """Class to instantiate Anthropic API client
    """
    def __init__(self):
        self.client = Anthropic(api_key=ANTHROPIC_API_KEY)
        self.model = "claude-sonnet-4-6"
        self.system = {"minutes": MINUTES_FEW_SHOTS[0]["content"]}
        self.prompts = {"minutes": MINUTES_FEW_SHOTS[1:]}

    def summarize(self, text, doc_type):
        """Constructs a prompt and summarizes assigned text.

        Args:
            text (str): body of text downloaded from government document
            doc_type (str): document type identifier (minutes, agenda, etc..). For constructing prompt.

        Raises:
            ValueError: When an invalid doc_type is passed

        Returns:
            response (tup): (response_data, API Status, N_input_tokens, N_output_tokens)
        """
        if doc_type not in self.prompts:
            raise ValueError(f"Unknown doc_type: {doc_type}")

        task = {
            "role": "user",
            "content": text
            }

        prompt = self.prompts[doc_type] + [task]

        try:
            response = self.client.messages.create(
                model=self.model,
                max_tokens=1024,
                system=self.system[doc_type],
                messages=prompt
                )

            response_data =  json.loads(response.content[0].text)
            input_tokens = response.usage.input_tokens
            output_tokens = response.usage.output_tokens

            return (response_data, "OK", input_tokens, output_tokens)
        except Exception as e:
            logger.warning(f"AI client failed: {e}")
            return (None, "FAIL", None, None)

summarize(text, doc_type)

Constructs a prompt and summarizes assigned text.

Parameters:
  • text (str) –

    body of text downloaded from government document

  • doc_type (str) –

    document type identifier (minutes, agenda, etc..). For constructing prompt.

Raises:
  • ValueError

    When an invalid doc_type is passed

Returns:
  • response( tup ) –

    (response_data, API Status, N_input_tokens, N_output_tokens)

Source code in wuiw/journalist.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def summarize(self, text, doc_type):
    """Constructs a prompt and summarizes assigned text.

    Args:
        text (str): body of text downloaded from government document
        doc_type (str): document type identifier (minutes, agenda, etc..). For constructing prompt.

    Raises:
        ValueError: When an invalid doc_type is passed

    Returns:
        response (tup): (response_data, API Status, N_input_tokens, N_output_tokens)
    """
    if doc_type not in self.prompts:
        raise ValueError(f"Unknown doc_type: {doc_type}")

    task = {
        "role": "user",
        "content": text
        }

    prompt = self.prompts[doc_type] + [task]

    try:
        response = self.client.messages.create(
            model=self.model,
            max_tokens=1024,
            system=self.system[doc_type],
            messages=prompt
            )

        response_data =  json.loads(response.content[0].text)
        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens

        return (response_data, "OK", input_tokens, output_tokens)
    except Exception as e:
        logger.warning(f"AI client failed: {e}")
        return (None, "FAIL", None, None)

Writer

review_article(draft)

Run checks on data coming back from AI provider

Parameters:
  • draft (dict) –

    article information created by writer.write_article()

Source code in wuiw/writer.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def review_article(draft):
    """Run checks on data coming back from AI provider

    Args:
        draft (dict): article information created by writer.write_article()
    """
    # check returned object is dictionary
    if not isinstance(draft, dict):
        return None, STATUS_FAILED, "draft is not a dict"

    # check all keys exist
    missing = REQUIRED_KEYS - draft.keys()
    if missing:
        return None, STATUS_FAILED, f"missing keys: {missing}"

    # check date format validation
    try:
        datetime.fromisoformat(draft["meeting_date"])
    except ValueError:
        logger.warning("date value: %s is not ISO format", draft["meeting_date"])
        return None, STATUS_FAILED, f"date: {draft['meeting_date']} not in format YYYY-MM-DD"

    # check bullets is a list etc
    if not isinstance(draft["bullets"], list):
        return None, STATUS_FAILED, "bullets is not a list"

    return draft, STATUS_COMPLETE, None

write_article(meeting_id, text, doc_type)

Send document text to journalist for summarization.

Parameters:
  • meeting_id (str) –

    composite meeting id

  • text (str) –

    document text

  • doc_type (str) –

    document type for prompt construction

Returns:
  • article( tup ) –

    (article dictionary item, status, error message)

Source code in wuiw/writer.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def write_article(meeting_id, text, doc_type):
    """Send document text to journalist for summarization.

    Args:
        meeting_id (str): composite meeting id
        text (str): document text
        doc_type (str): document type for prompt construction

    Returns:
        article (tup): (article dictionary item, status, error message)
    """
    try:
        provider = get_provider()
        draft, client_status, input_tokens, output_tokens = provider.summarize(text, doc_type)
        ai_log.record(datetime.now(), provider.model, client_status, input_tokens, output_tokens)
    except Exception as e:
        ai_log.record(datetime.now(), provider.model, "FAIL", None, None)
        return None, STATUS_FAILED, f"summarize failed: {e}"

    article, status, error = review_article(draft)

    if status == STATUS_FAILED:
        return None, STATUS_FAILED, error

    return {
        "meeting_id": meeting_id,
        "meeting_date": article.get("meeting_date"),
        "byline": provider.model,
        "doc_type": doc_type,
        "summary": article
    }, STATUS_COMPLETE, None