summaryrefslogtreecommitdiff
path: root/wikidata.scm
diff options
context:
space:
mode:
Diffstat (limited to 'wikidata.scm')
-rw-r--r--wikidata.scm197
1 files changed, 197 insertions, 0 deletions
diff --git a/wikidata.scm b/wikidata.scm
new file mode 100644
index 0000000..8931500
--- /dev/null
+++ b/wikidata.scm
@@ -0,0 +1,197 @@
+;;; Copyright © 2018 swedebugia <swedebugia@riseup.net>
+;;;
+;;; This file is part of guile-wikidata.
+;;;
+;;; guile-wikidata is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; guile-wikidata is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with guile-wikidata. If not, see <http://www.gnu.org/licenses/>.
+
+;;; See
+;;; https://www.mediawiki.org/wiki/API:Presenting_Wikidata_knowledge
+;;; for a good description of workflow when integrating Wikidata into
+;;; an application.
+
+(define-module (wikidata)
+ #:use-module (ice-9 format)
+ #:use-module (ice-9 optargs)
+ #:use-module (json)
+ #:use-module (guix import json)
+ #:use-module (srfi srfi-1)
+ #:use-module (web uri)
+ #:export (show))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Low-level proc.
+;; URI-decorators and fetching
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;TODO implement caching
+(define (wdquery uri)
+ "Fetch the data, return an alist"
+ (json-fetch-alist uri))
+
+;; Inspired by PYPI wikidata_suggest
+(define* (search-uri name
+ #:key
+ (format "json")
+ (language "en")
+ (type "item")
+ (continue "0")
+ (limit "10"))
+
+ "Build URI for the Wikidata wbsearchentities API."
+ (let ((url "https://www.wikidata.org/w/api.php")
+ (& "&")
+ (= "=")
+ (action "wbsearchentities"))
+ (string->uri
+ (string-append url "?"
+ "search" = name &
+ "action" = action &
+ "format" = format &
+ "language" = language &
+ "type" = type &
+ "continue" = continue &
+ "limit" = limit
+ ))))
+
+;; Inspired by
+;; https://opendata.stackexchange.com/questions/5248/how-to-get-the-name-of-a-wikidata-item
+;; TODO add handling of more than one qid.
+(define* (getentities-uri qid
+ #:optional property
+ #:key (language 'en)
+ (format 'json))
+ "Build URI for the Wikidata wbsearchintities API."
+ (let* ((url "https://www.wikidata.org/w/api.php")
+ (& "&")
+ (= "=")
+ (u (string-append url "?"
+ "ids" = qid &
+ "action" = "wbgetentities" &
+ "format" = (symbol->string format) &
+ "language" = (symbol->string language))))
+ (string->uri
+ (if (symbol? property)
+ (let ((property (symbol->string property)))
+ (string-append u & "props" = property))
+ ;; No property
+ u))))
+
+;; Only one at a time.
+(define* (getclaims-uri qid
+ #:key (format 'json))
+ "Build URI for the Wikidata wbgetclaims API."
+ (let* ((url "https://www.wikidata.org/w/api.php")
+ (& "&")
+ (= "=")
+ (u (string-append url "?"
+ "entity" = qid &
+ "action" = "wbgetclaims" &
+ "format" = (symbol->string format))))
+ (string->uri u)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Medium-level
+;; Extract from queries
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (get-label qid)
+ (let ((l "label"))
+ (first
+ (extract-all
+ (wdquery (getentities-uri qid l))))
+ ))
+
+(define (get-properties qid)
+ "Fetches and RETURNS list of all P-property keys from a qid"
+ (map car ;show all P only first P-statement
+ (cdr ; ->((p1)(p2(...)))) = list of properties
+ (first ; ->(claims ((p1)(p2(...))))
+ (wdquery
+ (getclaims-uri qid))))))
+
+;;test
+;;(display (map getlabel (get-properties "Q180736")))
+
+;; TODO factorize nested alist check
+(define* (extract-element alist element
+ #:optional (x 30))
+ "Accept unnested ALIST and return the value of ELEMENT.
+'qid. Truncate elements to X char, default to 30 char."
+ (if (if (member element '("label" "description" "id")) #t #f)
+ ;; True
+ (if (list? alist)
+ (let ((result (assoc-ref alist element)))
+ (if (string? result)
+ ;; Truncate string
+ (if (> (string-length result) x)
+ (string-append (substring result 0 x) "...")
+ result)
+ (if (null? result)
+ (begin
+ (error "extract-element: No" element "found:")
+ (display alist))
+ (string-append
+ "(No " element " in the database)"))))
+ (begin
+ (error "extract-element: Not a proper list:" )
+ (display alist)))
+ ;; Not one of the accepted elements
+ (error "extract-element: accepts only the strings: label, description or id")))
+
+(define (extract-all alist)
+ "Extract all elements for a given unnested alist"
+ (if (list? alist)
+ (if (not (= 0 (length alist)))
+ `(("label" . ,(extract-element alist "label"))
+ ("description" . ,(extract-element alist "description"))
+ ("id" . ,(extract-element alist "id")))
+ (error "extract-all: Nothing found." ))
+ (begin
+ (error "extract-all: Not a proper list:")
+ (display alist))))
+
+(define (pretty-print result)
+ "Takes an unnested alist RESULT and pretty prints it."
+ (format #t "~a:\t~a~%"
+ (extract-element result "id")
+ ;; Join and truncate long labels and descriptions
+ (let* ((l (extract-element result "label"))
+ (d (extract-element result "description"))
+ (ld (string-append l ": " d))
+ (x 50))
+ (if (> (string-length ld) x)
+ (string-append (substring ld 0 x) "...")
+ ld
+ ))))
+
+(define (extract-search name)
+ "Returns list with each element being an alist of label, desc, qid"
+ (map extract-all (assoc-ref (wdquery (search-uri name)) "search")))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; High-level
+;; Get the results fast
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define (search query x)
+ "Extract first x results and print them in a pretty truncated way."
+ (let ((result (extract-search query)))
+ (begin
+ (format #t "First ~a:\tLabel & Description~%" x)
+ (map pretty-print (take result x))
+ )
+ ))
+
+;; For example:
+;; (search "aragorn" 10)