diff options
| author | swedebugia <swedebugia@riseup.net> | 2018-12-09 02:12:07 +0100 |
|---|---|---|
| committer | swedebugia <swedebugia@riseup.net> | 2018-12-09 02:12:07 +0100 |
| commit | 057571f977bf3132820e48f958110f31d331fb55 (patch) | |
| tree | 610af4e8277892e415c9e420e76c60d64f255002 /wikidata.scm | |
| download | guile-wikidata-057571f977bf3132820e48f958110f31d331fb55.tar.gz | |
pre-release 0.1:
* COPYING
* README
* wikidata.scm: 11 new procedures.
4 low-level: URI-decorators and fetching.
6 medium-level: extract and format the data.
1 high-level: show the results.
Diffstat (limited to 'wikidata.scm')
| -rw-r--r-- | wikidata.scm | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/wikidata.scm b/wikidata.scm new file mode 100644 index 0000000..8931500 --- /dev/null +++ b/wikidata.scm @@ -0,0 +1,197 @@ +;;; Copyright © 2018 swedebugia <swedebugia@riseup.net> +;;; +;;; This file is part of guile-wikidata. +;;; +;;; guile-wikidata is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; guile-wikidata is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with guile-wikidata. If not, see <http://www.gnu.org/licenses/>. + +;;; See +;;; https://www.mediawiki.org/wiki/API:Presenting_Wikidata_knowledge +;;; for a good description of workflow when integrating Wikidata into +;;; an application. + +(define-module (wikidata) + #:use-module (ice-9 format) + #:use-module (ice-9 optargs) + #:use-module (json) + #:use-module (guix import json) + #:use-module (srfi srfi-1) + #:use-module (web uri) + #:export (show)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Low-level proc. +;; URI-decorators and fetching +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;TODO implement caching +(define (wdquery uri) + "Fetch the data, return an alist" + (json-fetch-alist uri)) + +;; Inspired by PYPI wikidata_suggest +(define* (search-uri name + #:key + (format "json") + (language "en") + (type "item") + (continue "0") + (limit "10")) + + "Build URI for the Wikidata wbsearchentities API." + (let ((url "https://www.wikidata.org/w/api.php") + (& "&") + (= "=") + (action "wbsearchentities")) + (string->uri + (string-append url "?" + "search" = name & + "action" = action & + "format" = format & + "language" = language & + "type" = type & + "continue" = continue & + "limit" = limit + )))) + +;; Inspired by +;; https://opendata.stackexchange.com/questions/5248/how-to-get-the-name-of-a-wikidata-item +;; TODO add handling of more than one qid. +(define* (getentities-uri qid + #:optional property + #:key (language 'en) + (format 'json)) + "Build URI for the Wikidata wbsearchintities API." + (let* ((url "https://www.wikidata.org/w/api.php") + (& "&") + (= "=") + (u (string-append url "?" + "ids" = qid & + "action" = "wbgetentities" & + "format" = (symbol->string format) & + "language" = (symbol->string language)))) + (string->uri + (if (symbol? property) + (let ((property (symbol->string property))) + (string-append u & "props" = property)) + ;; No property + u)))) + +;; Only one at a time. +(define* (getclaims-uri qid + #:key (format 'json)) + "Build URI for the Wikidata wbgetclaims API." + (let* ((url "https://www.wikidata.org/w/api.php") + (& "&") + (= "=") + (u (string-append url "?" + "entity" = qid & + "action" = "wbgetclaims" & + "format" = (symbol->string format)))) + (string->uri u))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Medium-level +;; Extract from queries +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (get-label qid) + (let ((l "label")) + (first + (extract-all + (wdquery (getentities-uri qid l)))) + )) + +(define (get-properties qid) + "Fetches and RETURNS list of all P-property keys from a qid" + (map car ;show all P only first P-statement + (cdr ; ->((p1)(p2(...)))) = list of properties + (first ; ->(claims ((p1)(p2(...)))) + (wdquery + (getclaims-uri qid)))))) + +;;test +;;(display (map getlabel (get-properties "Q180736"))) + +;; TODO factorize nested alist check +(define* (extract-element alist element + #:optional (x 30)) + "Accept unnested ALIST and return the value of ELEMENT. +'qid. Truncate elements to X char, default to 30 char." + (if (if (member element '("label" "description" "id")) #t #f) + ;; True + (if (list? alist) + (let ((result (assoc-ref alist element))) + (if (string? result) + ;; Truncate string + (if (> (string-length result) x) + (string-append (substring result 0 x) "...") + result) + (if (null? result) + (begin + (error "extract-element: No" element "found:") + (display alist)) + (string-append + "(No " element " in the database)")))) + (begin + (error "extract-element: Not a proper list:" ) + (display alist))) + ;; Not one of the accepted elements + (error "extract-element: accepts only the strings: label, description or id"))) + +(define (extract-all alist) + "Extract all elements for a given unnested alist" + (if (list? alist) + (if (not (= 0 (length alist))) + `(("label" . ,(extract-element alist "label")) + ("description" . ,(extract-element alist "description")) + ("id" . ,(extract-element alist "id"))) + (error "extract-all: Nothing found." )) + (begin + (error "extract-all: Not a proper list:") + (display alist)))) + +(define (pretty-print result) + "Takes an unnested alist RESULT and pretty prints it." + (format #t "~a:\t~a~%" + (extract-element result "id") + ;; Join and truncate long labels and descriptions + (let* ((l (extract-element result "label")) + (d (extract-element result "description")) + (ld (string-append l ": " d)) + (x 50)) + (if (> (string-length ld) x) + (string-append (substring ld 0 x) "...") + ld + )))) + +(define (extract-search name) + "Returns list with each element being an alist of label, desc, qid" + (map extract-all (assoc-ref (wdquery (search-uri name)) "search"))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; High-level +;; Get the results fast +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (search query x) + "Extract first x results and print them in a pretty truncated way." + (let ((result (extract-search query))) + (begin + (format #t "First ~a:\tLabel & Description~%" x) + (map pretty-print (take result x)) + ) + )) + +;; For example: +;; (search "aragorn" 10) |
